import sys import os from os import path from sklearn.preprocessing import MinMaxScaler sys.path.append(path.join(path.dirname(__file__), '../')) sys.path.insert(0, "../") import sys sys.path.append(os.getcwd()) import numpy as np from hwer import MultiCategoricalEmbedding, FeatureSet, Feature, FeatureType f1 = Feature( "f1", FeatureType.MULTI_CATEGORICAL, [["a", "b"], ["b"], ["c", "b"], ["a"], ["a", "c"], ["a", "b"], ["b"]]) f2 = Feature("f2", FeatureType.NUMERIC, [1.0, 3.0, 2.0, 4.0, 5.0, 6.0, 2.0]) fs = FeatureSet([f2]) cs = MultiCategoricalEmbedding(4, True) p = cs.fit_transform(f1, target=fs) print(p) # without target cs = MultiCategoricalEmbedding(4, True) p = cs.fit_transform(f1) print(p)
import sys import os from os import path from sklearn.preprocessing import MinMaxScaler sys.path.append(path.join(path.dirname(__file__), '../')) sys.path.insert(0, "../") import sys sys.path.append(os.getcwd()) import numpy as np from hwer import CategoricalEmbedding, FeatureSet, Feature, FeatureType f1 = Feature("f1", FeatureType.CATEGORICAL, ["a","b","c","b","c","c","a"]) f2 = Feature("f2", FeatureType.NUMERIC, [1.0,3.0,2.0,4.0,5.0,6.0,2.0]) fs = FeatureSet([f2]) cs = CategoricalEmbedding(4, True) p = cs.fit_transform(f1, target=fs) print(p) cs = CategoricalEmbedding(4, True) p = cs.fit_transform(f1) print(p)
def test_once(train_affinities, validation_affinities, items, capabilities=["svdpp", "resnet", "content", "triplet", "implicit"]): embedding_mapper = {} embedding_mapper['gender'] = CategoricalEmbedding(n_dims=2) embedding_mapper['age'] = CategoricalEmbedding(n_dims=2) embedding_mapper['occupation'] = CategoricalEmbedding(n_dims=4*kfold_multiplier) embedding_mapper['zip'] = CategoricalEmbedding(n_dims=2*kfold_multiplier) embedding_mapper['text'] = FlairGlove100AndBytePairEmbedding() embedding_mapper['numeric'] = NumericEmbedding(4*kfold_multiplier) embedding_mapper['genres'] = MultiCategoricalEmbedding(n_dims=4*kfold_multiplier) u1 = Feature(feature_name="gender", feature_type=FeatureType.CATEGORICAL, values=users.gender.values) u2 = Feature(feature_name="age", feature_type=FeatureType.CATEGORICAL, values=users.age.astype(str).values) u3 = Feature(feature_name="occupation", feature_type=FeatureType.CATEGORICAL, values=users.occupation.astype(str).values) u4 = Feature(feature_name="zip", feature_type=FeatureType.CATEGORICAL, values=users.zip.astype(str).values) user_data = FeatureSet([u1, u2, u3, u4]) i1 = Feature(feature_name="text", feature_type=FeatureType.STR, values=movies.text.values) i2 = Feature(feature_name="genres", feature_type=FeatureType.MULTI_CATEGORICAL, values=movies.genres.values) i3 = Feature(feature_name="numeric", feature_type=FeatureType.NUMERIC, values=movies[["title_length", "overview_length", "runtime"]].values) item_data = FeatureSet([i1, i2, i3]) kwargs = {} kwargs['user_data'] = user_data kwargs['item_data'] = item_data kwargs["hyperparameters"] = copy.deepcopy(hyperparameters) kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_svd"] = False kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_resnet"] = False kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["resnet_content_each_layer"] = False kwargs["hyperparameters"]['collaborative_params'][ "use_triplet"] = False kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][ "use_implicit"] = False if "svdpp" in capabilities: kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_svd"] = True if "resnet" in capabilities: kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_resnet"] = True if "content" in capabilities: kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][ "resnet_content_each_layer"] = True kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"]["use_resnet"] = True if "triplet" in capabilities: kwargs["hyperparameters"]['collaborative_params'][ "use_triplet"] = True if "implicit" in capabilities: kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][ "use_implicit"] = True if "dnn" in capabilities or "resnet" in capabilities: kwargs["hyperparameters"]['collaborative_params']["prediction_network_params"][ "use_dnn"] = True recsys = HybridRecommenderSVDpp(embedding_mapper=embedding_mapper, knn_params=None, rating_scale=(1, 5), n_content_dims=32 * kfold_multiplier, n_collaborative_dims=32 * kfold_multiplier) start = time.time() user_vectors, item_vectors = recsys.fit(users.user_id.values, movies.movie_id.values, train_affinities, **kwargs) # cos_sims = [] # for i in range(len(item_vectors)): # cos_sims.append([]) # for j in range(len(item_vectors)): # sim = cos_sim(item_vectors[i], item_vectors[j]) # cos_sims[i].append(sim) # cos_sims = np.array(cos_sims) # print(cos_sims.min(), cos_sims.max(), cos_sims.mean()) end = time.time() total_time = end - start predictions, actuals, rmse, mae = get_prediction_details(recsys, validation_affinities) _, _, train_rmse, train_mae = get_prediction_details(recsys, train_affinities) print("hybrid-" + "_".join(capabilities), ": ", rmse, mae, train_rmse, train_mae) ex_ee = extraction_efficiency(recsys, train_affinities, validation_affinities, model_get_topk, items) if enable_error_analysis: error_df = pd.DataFrame({"errors": actuals - predictions, "actuals": actuals, "predictions": predictions}) error_analysis(error_df, "Hybrid") results = [{"algo":"hybrid-" + "_".join(capabilities), "rmse": rmse, "mae": mae, "map": ex_ee["map"], "retrieval_time": ex_ee["retrieval_time"], "train_rmse": train_rmse, "train_mae": train_mae, "time": total_time}] return recsys, results, predictions, actuals
embedding_mapper['gender'] = CategoricalEmbedding(n_dims=2) embedding_mapper['age'] = CategoricalEmbedding(n_dims=2) embedding_mapper['occupation'] = CategoricalEmbedding(n_dims=2) embedding_mapper['zip'] = CategoricalEmbedding(n_dims=8) embedding_mapper['title'] = FlairGlove100AndBytePairEmbedding() embedding_mapper['genres'] = MultiCategoricalEmbedding(n_dims=16) recsys = ContentRecommendation(embedding_mapper=embedding_mapper, knn_params=None, n_output_dims=64) kwargs = {'user_item_affinities': user_item_affinities} u1 = Feature(feature_name="gender", feature_type=FeatureType.CATEGORICAL, values=users.gender.values) u2 = Feature(feature_name="age", feature_type=FeatureType.CATEGORICAL, values=users.age.astype(str).values) u3 = Feature(feature_name="occupation", feature_type=FeatureType.CATEGORICAL, values=users.occupation.astype(str).values) u4 = Feature(feature_name="zip", feature_type=FeatureType.CATEGORICAL, values=users.zip.astype(str).values) user_data = FeatureSet([u1, u2, u3, u4]) i1 = Feature(feature_name="title", feature_type=FeatureType.STR, values=movies.title.values)
from os import path from sklearn.preprocessing import MinMaxScaler sys.path.append(path.join(path.dirname(__file__), '../')) sys.path.insert(0, "../") import sys sys.path.append(os.getcwd()) import numpy as np from gensim.test.utils import common_texts from hwer import FasttextEmbedding, Feature, FeatureType ft = FasttextEmbedding( 32, fasttext_file= "/Users/ahemf/mygit/Hybrid-Weighted-Embedding-Recommender/hwer/fasttext.bin" ) text = list(map(lambda x: " ".join(x), common_texts)) f1 = Feature("text", FeatureType.STR, text) print(ft.fit_transform(f1)) print("=" * 40) ft = FasttextEmbedding(4, ) f1 = Feature("text", FeatureType.STR, text) print(ft.fit_transform(f1)) print(ft.fit_transform(f1).shape)
user_item_affinities = [] user_embeddings = [] for i, user in enumerate(user_ids): user_embeddings.append(np.average(i1_15[i*items_per_user:(i+1)*items_per_user], axis=0,)) for j in range(i*items_per_user, (i+1)*items_per_user): user_item_affinities.append((user, item_ids[j], 3)) user_embeddings = np.vstack(user_embeddings) actual_embeddings = np.concatenate((user_embeddings, i1_15)) embedding_mapper = {} embedding_mapper['numeric'] = NumericEmbedding(n_dims=3) f = Feature("numeric", FeatureType.NUMERIC, i1_15) item_data = FeatureSet([f]) kwargs = {'item_data': item_data} recsys = ContentRecommendation(embedding_mapper=embedding_mapper, knn_params=None, n_output_dims=2, rating_scale=(1, 5)) _ = recsys.fit(user_ids, item_ids, user_item_affinities, **kwargs) all_entities = list(zip(user_ids, [EntityType.USER]*len(user_ids))) +\ list(zip(item_ids, [EntityType.ITEM]*len(item_ids))) embeddings = recsys.get_embeddings(all_entities) all_entities = list(zip(user_ids, [EntityType.USER]*len(user_ids), [5]*len(user_ids))) +\ list(zip(item_ids, [EntityType.ITEM]*len(item_ids), [1]*len(item_ids)))
sys.path.append(path.join(path.dirname(__file__), '../')) sys.path.insert(0, "../") import sys sys.path.append(os.getcwd()) import numpy as np from gensim.test.utils import common_texts from hwer import FlairGlove100AndBytePairEmbedding,FlairGlove100Embedding, Feature, FeatureType # text = list(map(lambda x: " ".join(x), common_texts)) # # f1 = Feature("text", FeatureType.STR, text) # # flair1 = FlairGlove100Embedding() # print(flair1.fit_transform(f1)) # # flair2 = FlairGlove100AndBytePairEmbedding() # print(flair2.fit_transform(f1)) # # print(flair1.fit_transform(f1).shape) # print(flair2.fit_transform(f1).shape) # f1 = Feature("text", FeatureType.STR, ["eifjcchchbnikfncbcntnhbvthnrbjiechcrbinucknb"]) flair1 = FlairGlove100Embedding() print(flair1.fit_transform(f1))
import sys import os from os import path from sklearn.preprocessing import MinMaxScaler sys.path.append(path.join(path.dirname(__file__), '../')) sys.path.insert(0, "../") import sys sys.path.append(os.getcwd()) import numpy as np from hwer import CategoricalEmbedding, FeatureSet, Feature, NumericEmbedding, FeatureType f1 = Feature("f1", FeatureType.NUMERIC, [1.2, 0.1, 2.2, 4.1, 5.0, 6.1, 2.1, 5.0]) f2 = Feature("f2", FeatureType.NUMERIC, [0.7, 3.0, 2.0, 4.0, 5.0, 6.0, 7.0, 5.0]) new_vals = list(zip(f1.values, f2.values)) print(new_vals) f = Feature("f1", FeatureType.NUMERIC, new_vals) ns = NumericEmbedding(4) print(ns.fit_transform(f)) ns = NumericEmbedding(4) print(ns.fit_transform(f1))