Esempi in Python per load, esempi in Python per surprise.dump.load

Esempio n. 1

0

Mostra file

        def svd(user_id, area):
            algo = SVDpp()
            algo = SVDpp(n_factors=100, n_epochs=15)
            # 3. train model 저장
            file_name = os.path.expanduser('./dump')
            #dump.dump(file_name, algo=algo) # 한번 학습하고 여기는 주석처리
            _, algo = dump.load(file_name)

            Area = pd.read_csv('./area.csv')  ## { 상품아이디(학습데이터), area, 상품ID }

            #nowarea="C"
            #user=str("A2CX7LUOHB2NDG") # usre ID 받아오기
            neww = Area[Area['area'] == area]['productID'].tolist()  # 구역 받아오기
            predictions = [
                algo.predict(str(user_id), str(productID))
                for productID in neww
            ]  # 예측

            ######
            def sortkey_est(pred):
                return pred.est

            predictions.sort(key=sortkey_est, reverse=True)
            #print(predictions)
            top_product_id = [int(pred.iid) for pred in predictions]
            top_product_id = top_product_id[:5]
            return top_product_id

Esempio n. 2

0

Mostra file

def recommend_collaborative_implicit():
    if request.method == 'GET':
        # try:
        db = getDb()
        if (db):
            collaboratives = db.collaboratives
            customer_id = request.args.get('customer_id', default='')
            top = request.args.get('top', default='')
            user_id = request.args.get('user_id', default='')
            data = pd.DataFrame(
                list(
                    collaboratives.find({
                        'customer': ObjectId(customer_id),
                        'explicit': False
                    })))
            data = data[['userId', 'itemId', 'feedBack']]
            data = data.rename(columns={'userId': 'user', 'itemId': 'item'})
            data['user'] = data['user'].astype("category")
            data['item'] = data['item'].astype("category")

            # #cat.codes creates a categorical id for the users and artists
            data['user_id'] = data['user'].cat.codes
            data['item_id'] = data['item'].cat.codes
            sparse_item_user = sparse.csr_matrix(
                (data['feedBack'].astype(float), (data['item_id'],
                                                  data['user_id'])))
            sparse_user_item = sparse.csr_matrix(
                (data['feedBack'].astype(float), (data['user_id'],
                                                  data['item_id'])))
            user_ids = data[data['user'] == user_id].iloc[0]['user_id']

            _, model = dump.load('models/' + customer_id +
                                 '_collaborative_implicit')

            recommended = model.recommend(user_ids,
                                          sparse_user_item,
                                          N=int(top),
                                          filter_already_liked_items=False)
            result = []
            print('rec', recommended)
            for item in recommended:
                idx, score = item
                print('err', data[data.item_id == idx])
                result.append({
                    'item_id':
                    str(data.item.loc[data.item_id == idx].iloc[0]),
                    'score':
                    str(score)
                })
            return {
                'data': {
                    'current_user': {
                        'id': str(user_id),
                    },
                    'suggestion': result,
                    'top': top
                }
            }
        else:
            return "Database not found"

Esempio n. 3

0

Mostra file

File: server.py Progetto: lucmichalski/movies-surprise-flask

def recommend_from_param():
    # TODO: add algorithm as a parameter
    userId = request.args['userid']
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    _, loaded_algo = dump.load(os.path.expanduser('./SVD_model_couchDB'))
    print("file loaded")

    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
    recs = get_top_n(predictions_loaded_algo, 10)[int(userId)]
    print(recs)
    response_list = {
        'source': {
            "id": "SVD_model"
        },
        'movieIds': [],
        'predictedRatings': []
    }

    for i in range(10):
        response = {'movieId': recs[i][0], 'predicted rating': recs[i][1]}
        response_list['predictedRatings'].append(response)
        response_list['movieIds'].append(recs[i][0])
        print(response_list)
    return jsonify(response_list)

Esempio n. 4

0

Mostra file

File: item_rec_sys.py Progetto: abhijitpai000/product-recsys

def get_similar_items(item_name, n_similar_items=5):
    """
    Get Similar Items predicted by model.

    Parameters
    ----------
        item_name: name of the selected product.
        n_similar_items: number of similar products required, default=5.

    Returns
    -------
        Similar items list.
    """
    _, algo = load("backend/models/similar_items_algo.pkl")

    inner_item_mapping = pd.read_sql_table("item_id_mapping",
                                           engine,
                                           index_col="index")
    inner_id = inner_item_mapping[inner_item_mapping["item_raw_id"] ==
                                  item_name]
    inner_id = int(inner_id["item_inner_id"])
    similar_item_ids = algo.get_neighbors(inner_id, k=n_similar_items)

    similar_items = [algo.trainset.to_raw_iid(ids) for ids in similar_item_ids]
    return similar_items

Esempio n. 5

0

Mostra file

 def get_model(cls):
     """Get the model object for this instance, loading it if it's not already loaded."""
     if cls.model == None:
         # with open(os.path.join(model_path, 'model.pkl'), 'r') as inp:
         #     cls.model = pickle.load(inp)
         _, cls.model = dump.load(os.path.join(model_path, 'model.pkl'))
     return cls.model

Esempio n. 6

0

Mostra file

File: cf_util.py Progetto: 39239580/res_sys_tool-new-

def cf_model_load(file_path):
    """
    :param file_path: # 保存数据的位置
    :return:
    """
    # 假设里面保存了预测的数值，则返回 （prediction, algo）的元组，与保存的格式相对应
    return load(file_name=file_path)

Esempio n. 7

0

Mostra file

File: server.py Progetto: lucmichalski/movies-surprise-flask

def recommend_from_form():
    userId = request.form['userId']
    limit = int(request.form['limit'])
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    _, loaded_algo = dump.load(os.path.expanduser('./SVD_model_couchDB'))
    print("file loaded")

    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
    recs = get_top_n(predictions_loaded_algo, limit)[int(userId)]
    response_list = {
        'source': {
            "id": "SVD_model"
        },
        'movieIds': [],
        'predictedRatings': []
    }
    i = 0

    while (len(response_list['predictedRatings']) < limit and i < len(recs)):
        response = {'movieId': recs[i][0], 'predicted rating': recs[i][1]}
        response_list['predictedRatings'].append(response)
        response_list['movieIds'].append(recs[i][0])
        print(response_list)
        i = i + 1
    return jsonify(response_list)

Esempio n. 8

0

Mostra file

def _compute_recommendations(trainset,
                             new_user_neighbor_raw_id,
                             n_recommendations=3):
    """
    Compute Top 5 Product Recommendations.

    Parameters
    ----------
        trainset: data object.
        new_user_neighbor_raw_id: str, Inner Id for the nearest neighbor.
        n_recommendations: int, Number of recommendations.

    Returns
    -------
        top 5 recommendations.
    """
    _, algo = dump.load("backend/models/user_predictions_algo.pkl")
    item_id_mapping = pd.read_sql_table("item_id_mapping",
                                        engine,
                                        index_col="index")

    predictions = {}
    for items in list(item_id_mapping["item_raw_id"]):
        x = algo.predict(items, new_user_neighbor_raw_id)
        predictions[x[0]] = x[3]

    predictions = pd.DataFrame(predictions.values(), predictions.keys())
    top_five_recommends = list(
        predictions.sort_values(0,
                                ascending=False).head(n_recommendations).index)
    return top_five_recommends

Esempio n. 9

0

Mostra file

def fetch_recommendations(n_lojas=3, n_ofertas=2):
    model = load('recomendacao_lojas')[1]
    lojas = pd.read_csv('lojas.csv')
    lojas['id'] = lojas.index
    ofertas_manuais = pd.read_csv('ofertas_manuais.csv')
    lojas_ids = lojas['id'].values
    ratings = []
    for i in range(0, len(lojas_ids)):
        prediction = model.predict(uid=0, iid=lojas_ids[i])
        ratings.append(prediction.est)
    lojas_escolhidas = lojas.sample(n_lojas, weights=np.array(ratings), axis=0)
    ofertas_totais = None
    for i in range(0, lojas_escolhidas.shape[0]):
        ofertas = ofertas_manuais[ofertas_manuais['lojas'] ==
                                  lojas_escolhidas.iloc[[i]]['id'].values[0]]
        if ofertas.shape[0] != 0:
            if ofertas.shape[0] >= n_ofertas:
                ofertas = ofertas.sample(n_ofertas, weights='priority', axis=0)
            if ofertas_totais is None:
                ofertas_totais = ofertas
            else:
                ofertas_totais = pd.concat(
                    [ofertas_totais.reset_index(drop=True), ofertas], axis=0)
        else:
            #IMPLEMENTAR O CASO DE NÃO TER OFERTAS MANUAIS
            pass
    return lojas_escolhidas.join(ofertas_totais.set_index('lojas'),
                                 lsuffix='_lojas',
                                 rsuffix='_ofertas',
                                 on='id')

Esempio n. 10

0

Mostra file

File: validate_model.py Progetto: jakobpremrn/Recommender-System

def BaselineOnly_alg():
    print('Using BaselineOnly')
    _, alg = dump.load('BaselineOnly')
    predictions = alg.test(testset)
    print(accuracy.rmse(predictions))

    dump.dump('BSL_pred', predictions, alg)

Esempio n. 11

0

Mostra file

def get_user_recommend(ratings, movies, USER, filename):
    movies['genres'] = movies['genres'].fillna('[]').apply(literal_eval).apply(
        lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
    movies['year'] = (pd.to_datetime(
        movies['release_date'], errors='coerce').apply(
            lambda x: str(x).split('-')[0] if x != np.nan else np.nan))
    movies.drop(movies.columns.difference(
        ['movieId', 'title', 'genres', 'year']),
                1,
                inplace=True)
    movies.set_index('movieId', inplace=True)
    user_ratings = ratings[(ratings['userId'] == USER)]
    user_ratings = user_ratings.set_index('movieId')
    user_ratings = user_ratings.join(movies)
    user_ratings.drop(user_ratings.columns.difference(
        ['movieId', 'title', 'genres', 'year']),
                      1,
                      inplace=True)
    movies_cut = movies[~movies.isin(user_ratings)].dropna()
    _, svd = dump.load(filename)
    user_predict = movies_cut.copy()
    user_predict = user_predict.reset_index()
    user_predict['Estimate_Score'] = user_predict['movieId'].apply(
        lambda x: svd.predict(USER, x).est)
    user_predict = user_predict.sort_values('Estimate_Score', ascending=False)
    return user_predict

Esempio n. 12

0

Mostra file

File: test_dump.py Progetto: zxshinxz/Surprise

def test_dump():
    """Train an algorithm, compute its predictions then dump them.
    Ensure that the predictions that are loaded back are the correct ones, and
    that the predictions of the dumped algorithm are also equal to the other
    ones."""

    random.seed(0)

    train_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train')
    test_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_folds([(train_file, test_file)],
                                   Reader('ml-100k'))
    pkf = PredefinedKFold()

    trainset, testset = next(pkf.split(data))

    algo = BaselineOnly()
    algo.fit(trainset)
    predictions = algo.test(testset)

    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name, predictions, algo)
        predictions_dumped, algo_dumped = dump.load(tmp_file.name)

        predictions_algo_dumped = algo_dumped.test(testset)
        assert predictions == predictions_dumped
        assert predictions == predictions_algo_dumped

Esempio n. 13

0

Mostra file

    def load_prev_colab_results(self, user_id):
        (_, algo_tuned) = dump.load('SVD_tuned.p')

        iid = self.df20['route_id'].unique()
        #user_id = 200128311 #mine, trad, alpine, intermediate
        #user_id = 110596403 #boulder-er
        #user_id = 200272475 #boulder-er, advanced
        #user_id = 200077815 #michaels, trad, alpine, intermediate
        #user_id = 106540415 #mixed climber, alpine climber, advanced
        iid_me = self.df20.loc[self.df20['user_id'] == user_id, 'user_id']
        iids_to_pred = np.setdiff1d(iid, iid_me)

        testset = [[user_id, iid, 2] for iid in iids_to_pred]
        predictions_tuned = algo_tuned.test(testset)
        pred_ratings_tuned = np.array([pred.est for pred in predictions_tuned])

        i_max = np.argpartition(pred_ratings_tuned, -20)[-20:]
        i_max = i_max[np.argsort(-pred_ratings_tuned[i_max])]
        iid = iids_to_pred[i_max]

        #top 20 recommended climbs
        self.df_top_climbs_mf = pd.DataFrame(iid, pred_ratings_tuned[i_max])
        self.df_top_climbs_mf = self.df_top_climbs_mf.reset_index()

        self.df_top_climbs_mf.columns = ['predicted rating', 'route id']

Esempio n. 14

0

Mostra file

def get_svd_recommender(df, test_size=0.25, path="", exists=False):
    """
    builds and trains an SVD recommender
    :param df: a dataframe containing user ID's, beer ID's and ratings
    :param test_size: the fraction of samples that should be reserved for testing
    :param path: the path to an existing svd recommender that was saved to a file
    :param exists: whether or not to upload the algo from a saved file
    :return: trained recommender, list of predictions, and the root mean square error of the recommender
    """
    if exists:
        return dump.load(path)[1]

    # allows surprise to read df
    reader = Reader(rating_scale=(1, 5))
    # must load in particular column order
    data = Dataset.load_from_df(df[['user_id', 'beer_id', 'user_score']],
                                reader)

    trainset, testset = train_test_split(data, test_size=test_size)
    algo = SVD()
    # Train the algorithm on the trainset
    algo.fit(trainset)
    # and predict ratings for the testset. test() returns a list of prediction objects
    # which have several attributes such as est (the prediction) and r_ui (the true rating)
    predictions = algo.test(testset)

    # rmse below 1 is considered low
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)

    return algo, predictions, rmse

Esempio n. 15

0

Mostra file

File: recallprocess.py Progetto: chenjinhongv/doubanMovieRecommender

def svd4_user_movie_rate():
    """

    Returns
    -------返回训练好的基于user movie rating数据的svd模型，若没有训练好的模型存储，则触发模型训练&保存
    TYPE
        surprise.prediction_algorithms.algo_base.AlgoBase.
    TYPE
        surprise算法对象.

    """

    if os.path.exists(ALGO_RESULT_PATH + SVD_RESULT_USER2MOVIE):
        return dump.load(ALGO_RESULT_PATH + SVD_RESULT_USER2MOVIE)[0]
    else:
        # 读取数据
        reader = Reader(line_format="user item rating")
        data = Dataset.load_from_df(dataprocess.create_user_movie_rate().
                                    loc[:, ["user", "movie", "rate"]],
                                    reader=reader)
        trainset = data.build_full_trainset()

        # 参数设置&模型初始化
        algo = SVD(n_epochs=N_EPOCHS_SVD, lr_all=LR_ALL_SVD, verbose=True)
        algo.fit(trainset)

        dump.dump(file_name=ALGO_RESULT_PATH + SVD_RESULT_USER2MOVIE,
                  algo=algo,
                  verbose=True)
        return algo

Esempio n. 16

0

Mostra file

File: recallprocess.py Progetto: chenjinhongv/doubanMovieRecommender

def knn_user_movie_rate():
    """

    Returns
    -------返回训练好的基于user movie rating数据的knnbaseline模型
    TYPE
        surprise.KNNbaseline object.

    """

    if os.path.exists(ALGO_RESULT_PATH + KNN_RESULT_USER2MOVIE):
        return dump.load(ALGO_RESULT_PATH + KNN_RESULT_USER2MOVIE)[0]
    else:
        # 读取数据
        reader = Reader(line_format="user item rating", sep=",")
        data = Dataset.load_from_df(dataprocess.create_user_movie_rate().
                                    loc[:, ["user", "movie", "rate"]],
                                    reader=reader)
        trainset = data.build_full_trainset()

        # 参数设置&模型初始化
        sim_options = {'name': 'pearson', "user_based": False}
        algo = KNNBaseline(k=10, sim_options=sim_options)
        algo.fit(trainset)

        dump.dump(file_name=ALGO_RESULT_PATH + KNN_RESULT_USER2MOVIE,
                  algo=algo,
                  verbose=True)
        return algo

Esempio n. 17

0

Mostra file

def predict_new_user(newUser, pathToPivotData, pathToModel):
    # load pivoted data
    dfPivot = pd.read_csv(pathToPivotData, index_col=0)
    
    # append new user to data
    dfPivot = dfPivot.append(pd.DataFrame(newUser, index=['-99']))
    dfPivot = dfPivot.fillna(0)#dfPivot.mean(axis=0))
    
    # calculate distance to each existing user
    userDistance = {}
    for user in dfPivot.index:
        userDistance[user] = spatial.distance.euclidean(dfPivot.loc['-99'], dfPivot.loc[user])
    
    # get top n similar users
    n=20
    similarUsers = sorted(userDistance.items(), reverse=True,key=lambda x:-x[1])[1:n+1]
    similarUsersKeys = [key[0] for key in similarUsers]
    
    # load rs
    _, loaded_algo = dump.load(pathToModel)
    
    # get top movies for similar users
    preds = {}
    for user in similarUsersKeys:
        preds[user] = {}
        for movie in list(dfPivot):
            preds[user][movie] = loaded_algo.predict(uid = str(user), iid=str(movie))[3]
    predsDf = pd.DataFrame.from_dict(preds)
    
    # get top movies from average from top movies for similar users
    # TODO: add distance as weighting
    recommendedMovies = predsDf.mean(axis=1).sort_values(ascending=False)[:20].to_dict()
    print(recommendedMovies)
    return recommendedMovies

Esempio n. 18

0

Mostra file

File: test_dump.py Progetto: ChaeSongi/Surprise

def test_dump_nothing():
    """Ensure that by default None objects are dumped."""
    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name)
        predictions, algo = dump.load(tmp_file.name)
        assert predictions is None
        assert algo is None

Esempio n. 19

0

Mostra file

File: test_dump.py Progetto: zxshinxz/Surprise

def test_dump_nothing():
    """Ensure that by default None objects are dumped."""
    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name)
        predictions, algo = dump.load(tmp_file.name)
        assert predictions is None
        assert algo is None

Esempio n. 20

0

Mostra file

File: api.py Progetto: lucmichalski/movies-surprise-flask

def retrieve():
    print("request received!")
    retrieve_request = request.get_json()
    userId = retrieve_request.get("userId")
    candidateIds  = retrieve_request.get("candidateIds")
    excludeIds  = retrieve_request.get("excludeIds")
    # offset  = retrieve_request.get('offset')
    limit  = retrieve_request.get("limit")
    # retrievalCriteria  = retrieve_request.get('retrievalCriteria')

    # load data and model, get recommendations
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    _, loaded_algo = dump.load(os.path.expanduser('./SVD_model_couchDB'))
    print("file loaded")
    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
    recs = get_all_recs(predictions_loaded_algo)[int(userId)]
    # recs = get_top_n(predictions_loaded_algo,int(limit))[int(userId)]
    response_list = {'source':{"id":"SVD_model"},'movieIds':[]}
    i = 0

    # Format recommendations
    for i in range(0, len(recs)):
        if (len(response_list['movieIds']) >= int(limit)):
            break;
        if (recs[i][0] in candidateIds) and (recs[i][0] not in excludeIds):
            response_list['movieIds'].append(recs[i][0])
            i = i+1
    return jsonify(response_list)

Esempio n. 21

0

Mostra file

File: app.py Progetto: What-to-watch/wtw-ml-model

def get_model():
    path = model_dump_path
    if os.path.isfile(path):
        _, loaded_algo = dump.load(path)
        return loaded_algo
    else:
        algo = train(path)
        return algo

Esempio n. 22

0

Mostra file

def recommend_collaborative_explicit():
    if request.method == 'GET':
        try:
            db = getDb()
            if (db):
                collaboratives = db.collaboratives
                customer_id = request.args.get('customer_id', default='')
                top = request.args.get('top', default='')
                user_id = request.args.get('user_id', default='')

                data = pd.DataFrame(
                    list(
                        collaboratives.find({
                            'customer': ObjectId(customer_id),
                            'explicit': True
                        })))
                data = data[['userId', 'itemId', 'feedBack']]
                data = data.rename(columns={
                    'userId': 'user',
                    'itemId': 'item',
                    'feedBack': 'rating'
                })
                # # get list of product id
                iids = data['item'].unique()
                iids_user = data.loc[data['user'] == int(user_id), 'item']
                # # remove the idds that user has rated
                iids_to_pred = np.setdiff1d(iids, iids_user)
                testset = [[user_id, iid, 4.] for iid in iids_to_pred]
                _, loaded_algo = dump.load('models/' + customer_id +
                                           '_collaborative_explicit')

                predictions = loaded_algo.test(testset)
                pred_ratings = np.array([pred.est for pred in predictions])
                # i_max = pred_ratings.argmax()
                top_n = pred_ratings.argsort()[-int(top):][::-1]
                result = []

                for idx in top_n:
                    iid = iids_to_pred[idx]
                    result.append({
                        'itemId': iid,
                        'prediction_rating': pred_ratings[idx]
                    })
                return {
                    'data': {
                        'current_user': {
                            'id': user_id,
                        },
                        'suggestion': result,
                        'top': top
                    }
                }
            else:
                return "Database not found"
        except Exception as e:
            return "Error in " + str(e)

Esempio n. 23

0

Mostra file

File: validate_model.py Progetto: jakobpremrn/Recommender-System

def SVD_alg():
    print('Using SVD')
    _, alg = dump.load('SVD')
    predictions = alg.test(testset)
    #pred = alg.predict(5,2)
    #print(pred)
    #print(predictions)
    print(accuracy.rmse(predictions))

    dump.dump('SVD_pred', predictions, alg)

Esempio n. 24

0

Mostra file

File: user_collaborative_filtering.py Progetto: humingk/netease_music_cat

    def get_serialize_algo(self, score_min=50):
        """
        获取序列化algo

        :param score_min:
        :return:
        """
        file_name = os.path.expanduser("./score" + str(score_min) + ".dump")
        _, algo = dump.load(file_name=file_name)
        return algo

Esempio n. 25

0

Mostra file

File: recommender.py Progetto: tisu19021997/thesis-recsys

    def __init__(self, model_path):
        """Init RecSys
        Args:
            model_path (str): Model path
        """
        # load prediction and model from a given file
        self.predictions, self.model = dump.load(model_path)

        self.avg_recall = 0
        self.avg_precision = 0

Esempio n. 26

0

Mostra file

File: h1_ensemble.py Progetto: rcdnn/PerformanceEstimates

def get_oracle_labels_for_test_set(dataset_name, switch_ensemble):
    reader = Reader(line_format='user item rating timestamp', sep=',')
    train = Dataset.load_from_file("./created_data/" + dataset_name +
                                   "_train.csv",
                                   reader=reader)
    test_ensembles = Dataset.load_from_file("./created_data/" + dataset_name +
                                            "_test_ensembles.csv",
                                            reader=reader)

    uf = UserFeatures(
        pd.DataFrame(train.raw_ratings,
                     columns=["userId", "movieId", "rating", "timestamp"]),
        False)
    all_features_df = uf.get_all_user_features()

    recs_avg_errors = []
    for rs in RS:
        #Memory error for 16GB machine or float division error for lastfm
        if ("KNN" in rs["name"] and dataset_name in datasets_knn_mem_error):
            continue
        file_name = os.path.expanduser('./created_data/trained_RS/dump_file_' +
                                       dataset_name + '_' + rs["name"])
        _, loaded_algo = dump.load(file_name)

        predictions = loaded_algo.test(
            test_ensembles.build_full_trainset().build_testset())
        predictions_df = pd.DataFrame(
            predictions,
            columns=["userId", "movieId", "rating", "prediction", "details"])

        predictions_with_relevance = remove_dataset_bias(predictions_df,
                                                         has_ns=True)
        scores = predictions_with_relevance.groupby("userId").agg(
            lambda r, f=calculate_ndcg_score: f(r, "prediction"))
        scores = scores[[scores.columns[0]]].rename(index=str,
                                                    columns={
                                                        scores.columns[0]:
                                                        "NDCG"
                                                    }).reset_index()
        scores["RS"] = rs["name"]
        # this was used when mae was the criterea for creating the H1 dataset
        # predictions_df["error"] = abs(predictions_df["prediction"]-predictions_df["rating"])
        # avg_errors = predictions_df.groupby("userId")["error"].mean().rename("avg_error").to_frame().reset_index()
        # avg_errors["RS"] = rs["name"]

        recs_avg_errors.append(scores)

    all_avg_errors = pd.concat(recs_avg_errors).reset_index()
    assert all_avg_errors.isnull().values.any() == False

    Xy = create_best_RS_userwise_dataset(all_avg_errors, all_features_df)
    if ("amazon" not in dataset_name):
        Xy["userId"] = Xy["userId"].astype(int)
    return Xy.sort_values("userId")[["userId", "label"]]

Esempio n. 27

0

Mostra file

    def load_recommender(self, recommender):
        '''
        Load pickled model from recommender directory

        Param 
        ------
        recommender: str
            - accepts 'knn' or 'svd' 
        '''
        if recommender == 'knn':
            _, model = load(RECOMMENDERS_DIR + 'knn_recommender.pickle')
        elif recommender == 'svd':
            _, model = load(RECOMMENDERS_DIR + 'mf_recommender.pickle')
        else:
            model = None

        self.model_type = recommender
        self.model = model

        self.set_data(self.model)

Esempio n. 28

0

Mostra file

def estimate():
    predictions_svd, algo_svd = dump.load('models/dump_SVD_test')
    precisions, recalls = precision_recall_at_k(predictions_svd, k=TOP_K, threshold=THRESHOLD)

    df_svd = pd.DataFrame(predictions_svd, columns=['uid', 'iid', 'rui', 'est', 'details'])
    df_svd['err'] = abs(df_svd.est - df_svd.rui)

    with open("estimation.txt", "w+") as f:
        f.write(f"SVD\n{df_svd.head()}\n")
        # Precision and recall can then be averaged over all users
        f.write(f"Precision: {sum(prec for prec in precisions.values()) / len(precisions)}\n")
        f.write(f"Recall: {sum(rec for rec in recalls.values()) / len(recalls)}\n")

Esempio n. 29

0

Mostra file

File: app.py Progetto: alexander-m-py/recommendation_engine

def load_model(path=None):
    '''Wrapper with logging. Initializes the main machine
    learning model.
    '''
    app.logger.debug('Loading model...')

    if path is None:
        path = settings.MODEL_PATH

    _, model = dump.load(path)
    app.logger.debug('Model has been loaded successfully')
    return model

Esempio n. 30

0

Mostra file

File: score.py Progetto: benjaminbluhm/AzureRecommender

def init():
    # load the model from file into a global object
    global model

    # we assume that we have just one model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder
    # (./azureml-models/$MODEL_NAME/$VERSION)
    model_path = Model.get_model_path(
        os.getenv("AZUREML_MODEL_DIR").split('/')[-2])

    # model = joblib.load(model_path)
    model = dump.load(model_path)
    model = model[1]

Esempio n. 31

0

Mostra file

def train_SVD():
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()
    algo = SVD()
    algo.fit(trainset)

    # Dump algorithm and reload it.
    file_name = os.path.expanduser('./SVD_model')
    dump.dump(file_name, algo=algo)
    print("file dumped")

    # Load a model:
    _, loaded_algo = dump.load('./SVD_model')
    print("file loaded")
    predictions_loaded_algo = loaded_algo.test(trainset.build_testset())

Esempio n. 32

0

Mostra file

File: test_dump.py Progetto: ChaeSongi/Surprise

def test_dump(u1_ml100k):
    """Train an algorithm, compute its predictions then dump them.
    Ensure that the predictions that are loaded back are the correct ones, and
    that the predictions of the dumped algorithm are also equal to the other
    ones."""

    random.seed(0)

    trainset, testset = next(PredefinedKFold().split(u1_ml100k))

    algo = BaselineOnly()
    algo.fit(trainset)
    predictions = algo.test(testset)

    with tempfile.NamedTemporaryFile() as tmp_file:
        dump.dump(tmp_file.name, predictions, algo)
        predictions_dumped, algo_dumped = dump.load(tmp_file.name)

        predictions_algo_dumped = algo_dumped.test(testset)
        assert predictions == predictions_dumped
        assert predictions == predictions_algo_dumped

Esempio n. 33

0

Mostra file

File: serialize_algorithm.py Progetto: ChaeSongi/Surprise

then reloaded and can be used again for making predictions.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import os

from surprise import SVD
from surprise import Dataset
from surprise import dump


data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()

algo = SVD()
algo.fit(trainset)

# Compute predictions of the 'original' algorithm.
predictions = algo.test(trainset.build_testset())

# Dump algorithm and reload it.
file_name = os.path.expanduser('~/dump_file')
dump.dump(file_name, algo=algo)
_, loaded_algo = dump.load(file_name)

# We now ensure that the algo is still the same by checking the predictions.
predictions_loaded_algo = loaded_algo.test(trainset.build_testset())
assert predictions == predictions_loaded_algo
print('Predictions are the same')

Esempio n. 34

0

Mostra file

File: surprise_recommender.py Progetto: yasyf/vc

 def load(cls, filename):
   _, algo = load(filename)
   return cls(algo)