Esempio n. 1
0
        def svd(user_id, area):
            algo = SVDpp()
            algo = SVDpp(n_factors=100, n_epochs=15)
            # 3. train model 저장
            file_name = os.path.expanduser('./dump')
            #dump.dump(file_name, algo=algo) # 한번 학습하고 여기는 주석처리
            _, algo = dump.load(file_name)

            Area = pd.read_csv('./area.csv')  ## { 상품아이디(학습데이터), area, 상품ID }

            #nowarea="C"
            #user=str("A2CX7LUOHB2NDG") # usre ID 받아오기
            neww = Area[Area['area'] == area]['productID'].tolist()  # 구역 받아오기
            predictions = [
                algo.predict(str(user_id), str(productID))
                for productID in neww
            ]  # 예측

            ######
            def sortkey_est(pred):
                return pred.est

            predictions.sort(key=sortkey_est, reverse=True)
            #print(predictions)
            top_product_id = [int(pred.iid) for pred in predictions]
            top_product_id = top_product_id[:5]
            return top_product_id
Esempio n. 2
0
def recommend_collaborative_implicit():
    if request.method == 'GET':
        # try:
        db = getDb()
        if (db):
            collaboratives = db.collaboratives
            customer_id = request.args.get('customer_id', default='')
            top = request.args.get('top', default='')
            user_id = request.args.get('user_id', default='')
            data = pd.DataFrame(
                list(
                    collaboratives.find({
                        'customer': ObjectId(customer_id),
                        'explicit': False
                    })))
            data = data[['userId', 'itemId', 'feedBack']]
            data = data.rename(columns={'userId': 'user', 'itemId': 'item'})
            data['user'] = data['user'].astype("category")
            data['item'] = data['item'].astype("category")

            # #cat.codes creates a categorical id for the users and artists
            data['user_id'] = data['user'].cat.codes
            data['item_id'] = data['item'].cat.codes
            sparse_item_user = sparse.csr_matrix(
                (data['feedBack'].astype(float), (data['item_id'],
                                                  data['user_id'])))
            sparse_user_item = sparse.csr_matrix(
                (data['feedBack'].astype(float), (data['user_id'],
                                                  data['item_id'])))
            user_ids = data[data['user'] == user_id].iloc[0]['user_id']

            _, model = dump.load('models/' + customer_id +
                                 '_collaborative_implicit')

            recommended = model.recommend(user_ids,
                                          sparse_user_item,
                                          N=int(top),
                                          filter_already_liked_items=False)
            result = []
            print('rec', recommended)
            for item in recommended:
                idx, score = item
                print('err', data[data.item_id == idx])
                result.append({
                    'item_id':
                    str(data.item.loc[data.item_id == idx].iloc[0]),
                    'score':
                    str(score)
                })
            return {
                'data': {
                    'current_user': {
                        'id': str(user_id),
                    },
                    'suggestion': result,
                    'top': top
                }
            }
        else:
            return "Database not found"
def recommend_from_param():
    # TODO: add algorithm as a parameter
    userId = request.args['userid']
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    _, loaded_algo = dump.load(os.path.expanduser('./SVD_model_couchDB'))
    print("file loaded")

    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
    recs = get_top_n(predictions_loaded_algo, 10)[int(userId)]
    print(recs)
    response_list = {
        'source': {
            "id": "SVD_model"
        },
        'movieIds': [],
        'predictedRatings': []
    }

    for i in range(10):
        response = {'movieId': recs[i][0], 'predicted rating': recs[i][1]}
        response_list['predictedRatings'].append(response)
        response_list['movieIds'].append(recs[i][0])
        print(response_list)
    return jsonify(response_list)
def get_similar_items(item_name, n_similar_items=5):
    """
    Get Similar Items predicted by model.

    Parameters
    ----------
        item_name: name of the selected product.
        n_similar_items: number of similar products required, default=5.

    Returns
    -------
        Similar items list.
    """
    _, algo = load("backend/models/similar_items_algo.pkl")

    inner_item_mapping = pd.read_sql_table("item_id_mapping",
                                           engine,
                                           index_col="index")
    inner_id = inner_item_mapping[inner_item_mapping["item_raw_id"] ==
                                  item_name]
    inner_id = int(inner_id["item_inner_id"])
    similar_item_ids = algo.get_neighbors(inner_id, k=n_similar_items)

    similar_items = [algo.trainset.to_raw_iid(ids) for ids in similar_item_ids]
    return similar_items
Esempio n. 5
0
 def get_model(cls):
     """Get the model object for this instance, loading it if it's not already loaded."""
     if cls.model == None:
         # with open(os.path.join(model_path, 'model.pkl'), 'r') as inp:
         #     cls.model = pickle.load(inp)
         _, cls.model = dump.load(os.path.join(model_path, 'model.pkl'))
     return cls.model
Esempio n. 6
0
def cf_model_load(file_path):
    """
    :param file_path: # 保存数据的位置
    :return:
    """
    # 假设里面保存了预测的数值,则返回 (prediction, algo)的元组,与保存的格式相对应
    return load(file_name=file_path)
def recommend_from_form():
    userId = request.form['userId']
    limit = int(request.form['limit'])
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    _, loaded_algo = dump.load(os.path.expanduser('./SVD_model_couchDB'))
    print("file loaded")

    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
    recs = get_top_n(predictions_loaded_algo, limit)[int(userId)]
    response_list = {
        'source': {
            "id": "SVD_model"
        },
        'movieIds': [],
        'predictedRatings': []
    }
    i = 0

    while (len(response_list['predictedRatings']) < limit and i < len(recs)):
        response = {'movieId': recs[i][0], 'predicted rating': recs[i][1]}
        response_list['predictedRatings'].append(response)
        response_list['movieIds'].append(recs[i][0])
        print(response_list)
        i = i + 1
    return jsonify(response_list)
Esempio n. 8
0
def _compute_recommendations(trainset,
                             new_user_neighbor_raw_id,
                             n_recommendations=3):
    """
    Compute Top 5 Product Recommendations.

    Parameters
    ----------
        trainset: data object.
        new_user_neighbor_raw_id: str, Inner Id for the nearest neighbor.
        n_recommendations: int, Number of recommendations.

    Returns
    -------
        top 5 recommendations.
    """
    _, algo = dump.load("backend/models/user_predictions_algo.pkl")
    item_id_mapping = pd.read_sql_table("item_id_mapping",
                                        engine,
                                        index_col="index")

    predictions = {}
    for items in list(item_id_mapping["item_raw_id"]):
        x = algo.predict(items, new_user_neighbor_raw_id)
        predictions[x[0]] = x[3]

    predictions = pd.DataFrame(predictions.values(), predictions.keys())
    top_five_recommends = list(
        predictions.sort_values(0,
                                ascending=False).head(n_recommendations).index)
    return top_five_recommends
Esempio n. 9
0
def fetch_recommendations(n_lojas=3, n_ofertas=2):
    model = load('recomendacao_lojas')[1]
    lojas = pd.read_csv('lojas.csv')
    lojas['id'] = lojas.index
    ofertas_manuais = pd.read_csv('ofertas_manuais.csv')
    lojas_ids = lojas['id'].values
    ratings = []
    for i in range(0, len(lojas_ids)):
        prediction = model.predict(uid=0, iid=lojas_ids[i])
        ratings.append(prediction.est)
    lojas_escolhidas = lojas.sample(n_lojas, weights=np.array(ratings), axis=0)
    ofertas_totais = None
    for i in range(0, lojas_escolhidas.shape[0]):
        ofertas = ofertas_manuais[ofertas_manuais['lojas'] ==
                                  lojas_escolhidas.iloc[[i]]['id'].values[0]]
        if ofertas.shape[0] != 0:
            if ofertas.shape[0] >= n_ofertas:
                ofertas = ofertas.sample(n_ofertas, weights='priority', axis=0)
            if ofertas_totais is None:
                ofertas_totais = ofertas
            else:
                ofertas_totais = pd.concat(
                    [ofertas_totais.reset_index(drop=True), ofertas], axis=0)
        else:
            #IMPLEMENTAR O CASO DE NÃO TER OFERTAS MANUAIS
            pass
    return lojas_escolhidas.join(ofertas_totais.set_index('lojas'),
                                 lsuffix='_lojas',
                                 rsuffix='_ofertas',
                                 on='id')
def BaselineOnly_alg():
    print('Using BaselineOnly')
    _, alg = dump.load('BaselineOnly')
    predictions = alg.test(testset)
    print(accuracy.rmse(predictions))

    dump.dump('BSL_pred', predictions, alg)
Esempio n. 11
0
def get_user_recommend(ratings, movies, USER, filename):
    movies['genres'] = movies['genres'].fillna('[]').apply(literal_eval).apply(
        lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
    movies['year'] = (pd.to_datetime(
        movies['release_date'], errors='coerce').apply(
            lambda x: str(x).split('-')[0] if x != np.nan else np.nan))
    movies.drop(movies.columns.difference(
        ['movieId', 'title', 'genres', 'year']),
                1,
                inplace=True)
    movies.set_index('movieId', inplace=True)
    user_ratings = ratings[(ratings['userId'] == USER)]
    user_ratings = user_ratings.set_index('movieId')
    user_ratings = user_ratings.join(movies)
    user_ratings.drop(user_ratings.columns.difference(
        ['movieId', 'title', 'genres', 'year']),
                      1,
                      inplace=True)
    movies_cut = movies[~movies.isin(user_ratings)].dropna()
    _, svd = dump.load(filename)
    user_predict = movies_cut.copy()
    user_predict = user_predict.reset_index()
    user_predict['Estimate_Score'] = user_predict['movieId'].apply(
        lambda x: svd.predict(USER, x).est)
    user_predict = user_predict.sort_values('Estimate_Score', ascending=False)
    return user_predict
Esempio n. 12
0
def test_dump():
    """Train an algorithm, compute its predictions then dump them.
    Ensure that the predictions that are loaded back are the correct ones, and
    that the predictions of the dumped algorithm are also equal to the other
    ones."""

    random.seed(0)

    train_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train')
    test_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_folds([(train_file, test_file)],
                                   Reader('ml-100k'))
    pkf = PredefinedKFold()

    trainset, testset = next(pkf.split(data))

    algo = BaselineOnly()
    algo.fit(trainset)
    predictions = algo.test(testset)

    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name, predictions, algo)
        predictions_dumped, algo_dumped = dump.load(tmp_file.name)

        predictions_algo_dumped = algo_dumped.test(testset)
        assert predictions == predictions_dumped
        assert predictions == predictions_algo_dumped
Esempio n. 13
0
    def load_prev_colab_results(self, user_id):
        (_, algo_tuned) = dump.load('SVD_tuned.p')

        iid = self.df20['route_id'].unique()
        #user_id = 200128311 #mine, trad, alpine, intermediate
        #user_id = 110596403 #boulder-er
        #user_id = 200272475 #boulder-er, advanced
        #user_id = 200077815 #michaels, trad, alpine, intermediate
        #user_id = 106540415 #mixed climber, alpine climber, advanced
        iid_me = self.df20.loc[self.df20['user_id'] == user_id, 'user_id']
        iids_to_pred = np.setdiff1d(iid, iid_me)

        testset = [[user_id, iid, 2] for iid in iids_to_pred]
        predictions_tuned = algo_tuned.test(testset)
        pred_ratings_tuned = np.array([pred.est for pred in predictions_tuned])

        i_max = np.argpartition(pred_ratings_tuned, -20)[-20:]
        i_max = i_max[np.argsort(-pred_ratings_tuned[i_max])]
        iid = iids_to_pred[i_max]

        #top 20 recommended climbs
        self.df_top_climbs_mf = pd.DataFrame(iid, pred_ratings_tuned[i_max])
        self.df_top_climbs_mf = self.df_top_climbs_mf.reset_index()

        self.df_top_climbs_mf.columns = ['predicted rating', 'route id']
Esempio n. 14
0
def get_svd_recommender(df, test_size=0.25, path="", exists=False):
    """
    builds and trains an SVD recommender
    :param df: a dataframe containing user ID's, beer ID's and ratings
    :param test_size: the fraction of samples that should be reserved for testing
    :param path: the path to an existing svd recommender that was saved to a file
    :param exists: whether or not to upload the algo from a saved file
    :return: trained recommender, list of predictions, and the root mean square error of the recommender
    """
    if exists:
        return dump.load(path)[1]

    # allows surprise to read df
    reader = Reader(rating_scale=(1, 5))
    # must load in particular column order
    data = Dataset.load_from_df(df[['user_id', 'beer_id', 'user_score']],
                                reader)

    trainset, testset = train_test_split(data, test_size=test_size)
    algo = SVD()
    # Train the algorithm on the trainset
    algo.fit(trainset)
    # and predict ratings for the testset. test() returns a list of prediction objects
    # which have several attributes such as est (the prediction) and r_ui (the true rating)
    predictions = algo.test(testset)

    # rmse below 1 is considered low
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)

    return algo, predictions, rmse
def svd4_user_movie_rate():
    """

    Returns
    -------返回训练好的基于user movie rating数据的svd模型,若没有训练好的模型存储,则触发模型训练&保存
    TYPE
        surprise.prediction_algorithms.algo_base.AlgoBase.
    TYPE
        surprise算法对象.

    """

    if os.path.exists(ALGO_RESULT_PATH + SVD_RESULT_USER2MOVIE):
        return dump.load(ALGO_RESULT_PATH + SVD_RESULT_USER2MOVIE)[0]
    else:
        # 读取数据
        reader = Reader(line_format="user item rating")
        data = Dataset.load_from_df(dataprocess.create_user_movie_rate().
                                    loc[:, ["user", "movie", "rate"]],
                                    reader=reader)
        trainset = data.build_full_trainset()

        # 参数设置&模型初始化
        algo = SVD(n_epochs=N_EPOCHS_SVD, lr_all=LR_ALL_SVD, verbose=True)
        algo.fit(trainset)

        dump.dump(file_name=ALGO_RESULT_PATH + SVD_RESULT_USER2MOVIE,
                  algo=algo,
                  verbose=True)
        return algo
def knn_user_movie_rate():
    """

    Returns
    -------返回训练好的基于user movie rating数据的knnbaseline模型
    TYPE
        surprise.KNNbaseline object.

    """

    if os.path.exists(ALGO_RESULT_PATH + KNN_RESULT_USER2MOVIE):
        return dump.load(ALGO_RESULT_PATH + KNN_RESULT_USER2MOVIE)[0]
    else:
        # 读取数据
        reader = Reader(line_format="user item rating", sep=",")
        data = Dataset.load_from_df(dataprocess.create_user_movie_rate().
                                    loc[:, ["user", "movie", "rate"]],
                                    reader=reader)
        trainset = data.build_full_trainset()

        # 参数设置&模型初始化
        sim_options = {'name': 'pearson', "user_based": False}
        algo = KNNBaseline(k=10, sim_options=sim_options)
        algo.fit(trainset)

        dump.dump(file_name=ALGO_RESULT_PATH + KNN_RESULT_USER2MOVIE,
                  algo=algo,
                  verbose=True)
        return algo
Esempio n. 17
0
def predict_new_user(newUser, pathToPivotData, pathToModel):
    # load pivoted data
    dfPivot = pd.read_csv(pathToPivotData, index_col=0)
    
    # append new user to data
    dfPivot = dfPivot.append(pd.DataFrame(newUser, index=['-99']))
    dfPivot = dfPivot.fillna(0)#dfPivot.mean(axis=0))
    
    # calculate distance to each existing user
    userDistance = {}
    for user in dfPivot.index:
        userDistance[user] = spatial.distance.euclidean(dfPivot.loc['-99'], dfPivot.loc[user])
    
    # get top n similar users
    n=20
    similarUsers = sorted(userDistance.items(), reverse=True,key=lambda x:-x[1])[1:n+1]
    similarUsersKeys = [key[0] for key in similarUsers]
    
    # load rs
    _, loaded_algo = dump.load(pathToModel)
    
    # get top movies for similar users
    preds = {}
    for user in similarUsersKeys:
        preds[user] = {}
        for movie in list(dfPivot):
            preds[user][movie] = loaded_algo.predict(uid = str(user), iid=str(movie))[3]
    predsDf = pd.DataFrame.from_dict(preds)
    
    # get top movies from average from top movies for similar users
    # TODO: add distance as weighting
    recommendedMovies = predsDf.mean(axis=1).sort_values(ascending=False)[:20].to_dict()
    print(recommendedMovies)
    return recommendedMovies
Esempio n. 18
0
def test_dump_nothing():
    """Ensure that by default None objects are dumped."""
    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name)
        predictions, algo = dump.load(tmp_file.name)
        assert predictions is None
        assert algo is None
Esempio n. 19
0
def test_dump_nothing():
    """Ensure that by default None objects are dumped."""
    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name)
        predictions, algo = dump.load(tmp_file.name)
        assert predictions is None
        assert algo is None
Esempio n. 20
0
def retrieve():
    print("request received!")
    retrieve_request = request.get_json()
    userId = retrieve_request.get("userId")
    candidateIds  = retrieve_request.get("candidateIds")
    excludeIds  = retrieve_request.get("excludeIds")
    # offset  = retrieve_request.get('offset')
    limit  = retrieve_request.get("limit")
    # retrievalCriteria  = retrieve_request.get('retrievalCriteria')

    # load data and model, get recommendations
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    _, loaded_algo = dump.load(os.path.expanduser('./SVD_model_couchDB'))
    print("file loaded")
    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
    recs = get_all_recs(predictions_loaded_algo)[int(userId)]
    # recs = get_top_n(predictions_loaded_algo,int(limit))[int(userId)]
    response_list = {'source':{"id":"SVD_model"},'movieIds':[]}
    i = 0

    # Format recommendations
    for i in range(0, len(recs)):
        if (len(response_list['movieIds']) >= int(limit)):
            break;
        if (recs[i][0] in candidateIds) and (recs[i][0] not in excludeIds):
            response_list['movieIds'].append(recs[i][0])
            i = i+1
    return jsonify(response_list)
Esempio n. 21
0
def get_model():
    path = model_dump_path
    if os.path.isfile(path):
        _, loaded_algo = dump.load(path)
        return loaded_algo
    else:
        algo = train(path)
        return algo
Esempio n. 22
0
def recommend_collaborative_explicit():
    if request.method == 'GET':
        try:
            db = getDb()
            if (db):
                collaboratives = db.collaboratives
                customer_id = request.args.get('customer_id', default='')
                top = request.args.get('top', default='')
                user_id = request.args.get('user_id', default='')

                data = pd.DataFrame(
                    list(
                        collaboratives.find({
                            'customer': ObjectId(customer_id),
                            'explicit': True
                        })))
                data = data[['userId', 'itemId', 'feedBack']]
                data = data.rename(columns={
                    'userId': 'user',
                    'itemId': 'item',
                    'feedBack': 'rating'
                })
                # # get list of product id
                iids = data['item'].unique()
                iids_user = data.loc[data['user'] == int(user_id), 'item']
                # # remove the idds that user has rated
                iids_to_pred = np.setdiff1d(iids, iids_user)
                testset = [[user_id, iid, 4.] for iid in iids_to_pred]
                _, loaded_algo = dump.load('models/' + customer_id +
                                           '_collaborative_explicit')

                predictions = loaded_algo.test(testset)
                pred_ratings = np.array([pred.est for pred in predictions])
                # i_max = pred_ratings.argmax()
                top_n = pred_ratings.argsort()[-int(top):][::-1]
                result = []

                for idx in top_n:
                    iid = iids_to_pred[idx]
                    result.append({
                        'itemId': iid,
                        'prediction_rating': pred_ratings[idx]
                    })
                return {
                    'data': {
                        'current_user': {
                            'id': user_id,
                        },
                        'suggestion': result,
                        'top': top
                    }
                }
            else:
                return "Database not found"
        except Exception as e:
            return "Error in " + str(e)
def SVD_alg():
    print('Using SVD')
    _, alg = dump.load('SVD')
    predictions = alg.test(testset)
    #pred = alg.predict(5,2)
    #print(pred)
    #print(predictions)
    print(accuracy.rmse(predictions))

    dump.dump('SVD_pred', predictions, alg)
    def get_serialize_algo(self, score_min=50):
        """
        获取序列化algo

        :param score_min:
        :return:
        """
        file_name = os.path.expanduser("./score" + str(score_min) + ".dump")
        _, algo = dump.load(file_name=file_name)
        return algo
Esempio n. 25
0
    def __init__(self, model_path):
        """Init RecSys
        Args:
            model_path (str): Model path
        """
        # load prediction and model from a given file
        self.predictions, self.model = dump.load(model_path)

        self.avg_recall = 0
        self.avg_precision = 0
Esempio n. 26
0
def get_oracle_labels_for_test_set(dataset_name, switch_ensemble):
    reader = Reader(line_format='user item rating timestamp', sep=',')
    train = Dataset.load_from_file("./created_data/" + dataset_name +
                                   "_train.csv",
                                   reader=reader)
    test_ensembles = Dataset.load_from_file("./created_data/" + dataset_name +
                                            "_test_ensembles.csv",
                                            reader=reader)

    uf = UserFeatures(
        pd.DataFrame(train.raw_ratings,
                     columns=["userId", "movieId", "rating", "timestamp"]),
        False)
    all_features_df = uf.get_all_user_features()

    recs_avg_errors = []
    for rs in RS:
        #Memory error for 16GB machine or float division error for lastfm
        if ("KNN" in rs["name"] and dataset_name in datasets_knn_mem_error):
            continue
        file_name = os.path.expanduser('./created_data/trained_RS/dump_file_' +
                                       dataset_name + '_' + rs["name"])
        _, loaded_algo = dump.load(file_name)

        predictions = loaded_algo.test(
            test_ensembles.build_full_trainset().build_testset())
        predictions_df = pd.DataFrame(
            predictions,
            columns=["userId", "movieId", "rating", "prediction", "details"])

        predictions_with_relevance = remove_dataset_bias(predictions_df,
                                                         has_ns=True)
        scores = predictions_with_relevance.groupby("userId").agg(
            lambda r, f=calculate_ndcg_score: f(r, "prediction"))
        scores = scores[[scores.columns[0]]].rename(index=str,
                                                    columns={
                                                        scores.columns[0]:
                                                        "NDCG"
                                                    }).reset_index()
        scores["RS"] = rs["name"]
        # this was used when mae was the criterea for creating the H1 dataset
        # predictions_df["error"] = abs(predictions_df["prediction"]-predictions_df["rating"])
        # avg_errors = predictions_df.groupby("userId")["error"].mean().rename("avg_error").to_frame().reset_index()
        # avg_errors["RS"] = rs["name"]

        recs_avg_errors.append(scores)

    all_avg_errors = pd.concat(recs_avg_errors).reset_index()
    assert all_avg_errors.isnull().values.any() == False

    Xy = create_best_RS_userwise_dataset(all_avg_errors, all_features_df)
    if ("amazon" not in dataset_name):
        Xy["userId"] = Xy["userId"].astype(int)
    return Xy.sort_values("userId")[["userId", "label"]]
Esempio n. 27
0
    def load_recommender(self, recommender):
        '''
        Load pickled model from recommender directory

        Param 
        ------
        recommender: str
            - accepts 'knn' or 'svd' 
        '''
        if recommender == 'knn':
            _, model = load(RECOMMENDERS_DIR + 'knn_recommender.pickle')
        elif recommender == 'svd':
            _, model = load(RECOMMENDERS_DIR + 'mf_recommender.pickle')
        else:
            model = None

        self.model_type = recommender
        self.model = model

        self.set_data(self.model)
Esempio n. 28
0
def estimate():
    predictions_svd, algo_svd = dump.load('models/dump_SVD_test')
    precisions, recalls = precision_recall_at_k(predictions_svd, k=TOP_K, threshold=THRESHOLD)

    df_svd = pd.DataFrame(predictions_svd, columns=['uid', 'iid', 'rui', 'est', 'details'])
    df_svd['err'] = abs(df_svd.est - df_svd.rui)

    with open("estimation.txt", "w+") as f:
        f.write(f"SVD\n{df_svd.head()}\n")
        # Precision and recall can then be averaged over all users
        f.write(f"Precision: {sum(prec for prec in precisions.values()) / len(precisions)}\n")
        f.write(f"Recall: {sum(rec for rec in recalls.values()) / len(recalls)}\n")
Esempio n. 29
0
def load_model(path=None):
    '''Wrapper with logging. Initializes the main machine
    learning model.
    '''
    app.logger.debug('Loading model...')

    if path is None:
        path = settings.MODEL_PATH

    _, model = dump.load(path)
    app.logger.debug('Model has been loaded successfully')
    return model
Esempio n. 30
0
def init():
    # load the model from file into a global object
    global model

    # we assume that we have just one model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder
    # (./azureml-models/$MODEL_NAME/$VERSION)
    model_path = Model.get_model_path(
        os.getenv("AZUREML_MODEL_DIR").split('/')[-2])

    # model = joblib.load(model_path)
    model = dump.load(model_path)
    model = model[1]
Esempio n. 31
0
def train_SVD():
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    algo = SVD()
    algo.fit(trainset)

    # Dump algorithm and reload it.
    file_name = os.path.expanduser('./SVD_model')
    dump.dump(file_name, algo=algo)
    print("file dumped")

    # Load a model:
    _, loaded_algo = dump.load('./SVD_model')
    print("file loaded")
    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
Esempio n. 32
0
def test_dump(u1_ml100k):
    """Train an algorithm, compute its predictions then dump them.
    Ensure that the predictions that are loaded back are the correct ones, and
    that the predictions of the dumped algorithm are also equal to the other
    ones."""

    random.seed(0)

    trainset, testset = next(PredefinedKFold().split(u1_ml100k))

    algo = BaselineOnly()
    algo.fit(trainset)
    predictions = algo.test(testset)

    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name, predictions, algo)
        predictions_dumped, algo_dumped = dump.load(tmp_file.name)

        predictions_algo_dumped = algo_dumped.test(testset)
        assert predictions == predictions_dumped
        assert predictions == predictions_algo_dumped
Esempio n. 33
0
then reloaded and can be used again for making predictions.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import os

from surprise import SVD
from surprise import Dataset
from surprise import dump


data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()

algo = SVD()
algo.fit(trainset)

# Compute predictions of the 'original' algorithm.
predictions = algo.test(trainset.build_testset())

# Dump algorithm and reload it.
file_name = os.path.expanduser('~/dump_file')
dump.dump(file_name, algo=algo)
_, loaded_algo = dump.load(file_name)

# We now ensure that the algo is still the same by checking the predictions.
predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
assert predictions == predictions_loaded_algo
print('Predictions are the same')
Esempio n. 34
0
 def load(cls, filename):
   _, algo = load(filename)
   return cls(algo)