예제 #1
0
import sys
import os
from os import path
from sklearn.preprocessing import MinMaxScaler
sys.path.append(path.join(path.dirname(__file__), '../'))

sys.path.insert(0, "../")

import sys
sys.path.append(os.getcwd())

import numpy as np
from hwer import MultiCategoricalEmbedding, FeatureSet, Feature, FeatureType

f1 = Feature(
    "f1", FeatureType.MULTI_CATEGORICAL,
    [["a", "b"], ["b"], ["c", "b"], ["a"], ["a", "c"], ["a", "b"], ["b"]])
f2 = Feature("f2", FeatureType.NUMERIC, [1.0, 3.0, 2.0, 4.0, 5.0, 6.0, 2.0])

fs = FeatureSet([f2])

cs = MultiCategoricalEmbedding(4, True)
p = cs.fit_transform(f1, target=fs)
print(p)

# without target
cs = MultiCategoricalEmbedding(4, True)
p = cs.fit_transform(f1)
print(p)
import sys
import os
from os import path
from sklearn.preprocessing import MinMaxScaler
sys.path.append(path.join(path.dirname(__file__), '../'))

sys.path.insert(0, "../")

import sys
sys.path.append(os.getcwd())

import numpy as np
from hwer import CategoricalEmbedding, FeatureSet, Feature, FeatureType

f1 = Feature("f1", FeatureType.CATEGORICAL, ["a","b","c","b","c","c","a"])
f2 = Feature("f2", FeatureType.NUMERIC, [1.0,3.0,2.0,4.0,5.0,6.0,2.0])

fs = FeatureSet([f2])

cs = CategoricalEmbedding(4, True)

p = cs.fit_transform(f1, target=fs)
print(p)

cs = CategoricalEmbedding(4, True)

p = cs.fit_transform(f1)
print(p)


예제 #3
0
def test_once(train_affinities, validation_affinities, items, capabilities=["svdpp", "resnet", "content", "triplet", "implicit"]):
    embedding_mapper = {}
    embedding_mapper['gender'] = CategoricalEmbedding(n_dims=2)
    embedding_mapper['age'] = CategoricalEmbedding(n_dims=2)
    embedding_mapper['occupation'] = CategoricalEmbedding(n_dims=4*kfold_multiplier)
    embedding_mapper['zip'] = CategoricalEmbedding(n_dims=2*kfold_multiplier)

    embedding_mapper['text'] = FlairGlove100AndBytePairEmbedding()
    embedding_mapper['numeric'] = NumericEmbedding(4*kfold_multiplier)
    embedding_mapper['genres'] = MultiCategoricalEmbedding(n_dims=4*kfold_multiplier)

    u1 = Feature(feature_name="gender", feature_type=FeatureType.CATEGORICAL, values=users.gender.values)
    u2 = Feature(feature_name="age", feature_type=FeatureType.CATEGORICAL, values=users.age.astype(str).values)
    u3 = Feature(feature_name="occupation", feature_type=FeatureType.CATEGORICAL,
                 values=users.occupation.astype(str).values)
    u4 = Feature(feature_name="zip", feature_type=FeatureType.CATEGORICAL, values=users.zip.astype(str).values)
    user_data = FeatureSet([u1, u2, u3, u4])

    i1 = Feature(feature_name="text", feature_type=FeatureType.STR, values=movies.text.values)
    i2 = Feature(feature_name="genres", feature_type=FeatureType.MULTI_CATEGORICAL, values=movies.genres.values)
    i3 = Feature(feature_name="numeric", feature_type=FeatureType.NUMERIC,
                 values=movies[["title_length", "overview_length", "runtime"]].values)
    item_data = FeatureSet([i1, i2, i3])

    kwargs = {}
    kwargs['user_data'] = user_data
    kwargs['item_data'] = item_data
    kwargs["hyperparameters"] = copy.deepcopy(hyperparameters)
    kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_svd"] = False
    kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_resnet"] = False
    kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["resnet_content_each_layer"] = False
    kwargs["hyperparameters"]['collaborative_params'][
        "use_triplet"] = False
    kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][
        "use_implicit"] = False
    if "svdpp" in capabilities:
        kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_svd"] = True
    if "resnet" in capabilities:
        kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_resnet"] = True
    if "content" in capabilities:
        kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][
            "resnet_content_each_layer"] = True
        kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_resnet"] = True
    if "triplet" in capabilities:
        kwargs["hyperparameters"]['collaborative_params'][
            "use_triplet"] = True
    if "implicit" in capabilities:
        kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][
            "use_implicit"] = True
    if "dnn" in capabilities or "resnet" in capabilities:
        kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][
            "use_dnn"] = True

    recsys = HybridRecommenderSVDpp(embedding_mapper=embedding_mapper, knn_params=None, rating_scale=(1, 5),
                                    n_content_dims=32 * kfold_multiplier,
                                    n_collaborative_dims=32 * kfold_multiplier)

    start = time.time()
    user_vectors, item_vectors = recsys.fit(users.user_id.values, movies.movie_id.values,
                                            train_affinities, **kwargs)
    # cos_sims = []
    # for i in range(len(item_vectors)):
    #     cos_sims.append([])
    #     for j in range(len(item_vectors)):
    #         sim = cos_sim(item_vectors[i], item_vectors[j])
    #         cos_sims[i].append(sim)
    # cos_sims = np.array(cos_sims)
    # print(cos_sims.min(), cos_sims.max(), cos_sims.mean())

    end = time.time()
    total_time = end - start
    predictions, actuals, rmse, mae = get_prediction_details(recsys, validation_affinities)
    _, _, train_rmse, train_mae = get_prediction_details(recsys, train_affinities)
    print("hybrid-" + "_".join(capabilities), ": ", rmse, mae, train_rmse, train_mae)
    ex_ee = extraction_efficiency(recsys, train_affinities, validation_affinities, model_get_topk, items)
    if enable_error_analysis:
        error_df = pd.DataFrame({"errors": actuals - predictions, "actuals": actuals, "predictions": predictions})
        error_analysis(error_df, "Hybrid")
    results = [{"algo":"hybrid-" + "_".join(capabilities), "rmse": rmse, "mae": mae,
                "map": ex_ee["map"], "retrieval_time": ex_ee["retrieval_time"],
                "train_rmse": train_rmse, "train_mae": train_mae, "time": total_time}]
    return recsys, results, predictions, actuals
예제 #4
0
embedding_mapper['gender'] = CategoricalEmbedding(n_dims=2)
embedding_mapper['age'] = CategoricalEmbedding(n_dims=2)
embedding_mapper['occupation'] = CategoricalEmbedding(n_dims=2)
embedding_mapper['zip'] = CategoricalEmbedding(n_dims=8)

embedding_mapper['title'] = FlairGlove100AndBytePairEmbedding()
embedding_mapper['genres'] = MultiCategoricalEmbedding(n_dims=16)

recsys = ContentRecommendation(embedding_mapper=embedding_mapper,
                               knn_params=None,
                               n_output_dims=64)

kwargs = {'user_item_affinities': user_item_affinities}

u1 = Feature(feature_name="gender",
             feature_type=FeatureType.CATEGORICAL,
             values=users.gender.values)
u2 = Feature(feature_name="age",
             feature_type=FeatureType.CATEGORICAL,
             values=users.age.astype(str).values)
u3 = Feature(feature_name="occupation",
             feature_type=FeatureType.CATEGORICAL,
             values=users.occupation.astype(str).values)
u4 = Feature(feature_name="zip",
             feature_type=FeatureType.CATEGORICAL,
             values=users.zip.astype(str).values)
user_data = FeatureSet([u1, u2, u3, u4])

i1 = Feature(feature_name="title",
             feature_type=FeatureType.STR,
             values=movies.title.values)
from os import path
from sklearn.preprocessing import MinMaxScaler
sys.path.append(path.join(path.dirname(__file__), '../'))

sys.path.insert(0, "../")

import sys
sys.path.append(os.getcwd())

import numpy as np
from gensim.test.utils import common_texts

from hwer import FasttextEmbedding, Feature, FeatureType

ft = FasttextEmbedding(
    32,
    fasttext_file=
    "/Users/ahemf/mygit/Hybrid-Weighted-Embedding-Recommender/hwer/fasttext.bin"
)

text = list(map(lambda x: " ".join(x), common_texts))
f1 = Feature("text", FeatureType.STR, text)
print(ft.fit_transform(f1))

print("=" * 40)
ft = FasttextEmbedding(4, )

f1 = Feature("text", FeatureType.STR, text)
print(ft.fit_transform(f1))

print(ft.fit_transform(f1).shape)
user_item_affinities = []
user_embeddings = []
for i, user in enumerate(user_ids):
    user_embeddings.append(np.average(i1_15[i*items_per_user:(i+1)*items_per_user], axis=0,))
    for j in range(i*items_per_user, (i+1)*items_per_user):
        user_item_affinities.append((user, item_ids[j], 3))


user_embeddings = np.vstack(user_embeddings)
actual_embeddings = np.concatenate((user_embeddings, i1_15))

embedding_mapper = {}
embedding_mapper['numeric'] = NumericEmbedding(n_dims=3)

f = Feature("numeric", FeatureType.NUMERIC, i1_15)
item_data = FeatureSet([f])

kwargs = {'item_data': item_data}

recsys = ContentRecommendation(embedding_mapper=embedding_mapper, knn_params=None, n_output_dims=2, rating_scale=(1, 5))
_ = recsys.fit(user_ids, item_ids,
               user_item_affinities, **kwargs)

all_entities = list(zip(user_ids, [EntityType.USER]*len(user_ids))) +\
               list(zip(item_ids, [EntityType.ITEM]*len(item_ids)))

embeddings = recsys.get_embeddings(all_entities)

all_entities = list(zip(user_ids, [EntityType.USER]*len(user_ids), [5]*len(user_ids))) +\
               list(zip(item_ids, [EntityType.ITEM]*len(item_ids), [1]*len(item_ids)))
예제 #7
0
sys.path.append(path.join(path.dirname(__file__), '../'))

sys.path.insert(0, "../")

import sys
sys.path.append(os.getcwd())

import numpy as np
from gensim.test.utils import common_texts

from hwer import FlairGlove100AndBytePairEmbedding,FlairGlove100Embedding, Feature, FeatureType

# text = list(map(lambda x: " ".join(x), common_texts))
#
# f1 = Feature("text", FeatureType.STR, text)
#
# flair1 = FlairGlove100Embedding()
# print(flair1.fit_transform(f1))
#
# flair2 = FlairGlove100AndBytePairEmbedding()
# print(flair2.fit_transform(f1))
#
# print(flair1.fit_transform(f1).shape)
# print(flair2.fit_transform(f1).shape)


#
f1 = Feature("text", FeatureType.STR, ["eifjcchchbnikfncbcntnhbvthnrbjiechcrbinucknb"])
flair1 = FlairGlove100Embedding()
print(flair1.fit_transform(f1))
import sys
import os
from os import path
from sklearn.preprocessing import MinMaxScaler

sys.path.append(path.join(path.dirname(__file__), '../'))

sys.path.insert(0, "../")

import sys

sys.path.append(os.getcwd())

import numpy as np
from hwer import CategoricalEmbedding, FeatureSet, Feature, NumericEmbedding, FeatureType

f1 = Feature("f1", FeatureType.NUMERIC,
             [1.2, 0.1, 2.2, 4.1, 5.0, 6.1, 2.1, 5.0])
f2 = Feature("f2", FeatureType.NUMERIC,
             [0.7, 3.0, 2.0, 4.0, 5.0, 6.0, 7.0, 5.0])
new_vals = list(zip(f1.values, f2.values))
print(new_vals)

f = Feature("f1", FeatureType.NUMERIC, new_vals)

ns = NumericEmbedding(4)
print(ns.fit_transform(f))

ns = NumericEmbedding(4)
print(ns.fit_transform(f1))