def test_input_missing(): input_node1 = Input() input_node2 = Input() output_node1 = MLPInteraction()(input_node1) output_node2 = MLPInteraction()(input_node2) output_node = ConcatenateInteraction()([output_node1, output_node2]) output_node = RatingPredictionOptimizer()(output_node) with pytest.raises(ValueError) as info: graph_module.HyperGraph(input_node1, output_node) assert 'A required input is missing for HyperModel' in str(info.value)
def test_input_output_disconnect(): input_node1 = Input() output_node = input_node1 _ = MLPInteraction()(output_node) input_node = Input() output_node = input_node output_node = MLPInteraction()(output_node) output_node = RatingPredictionOptimizer()(output_node) with pytest.raises(ValueError) as info: graph_module.HyperGraph(input_node1, output_node) assert 'Inputs and outputs not connected.' in str(info.value)
def test_graph_basics(): input_node = Input(shape=(30, )) output_node = input_node output_node = MLPInteraction()(output_node) output_node = RatingPredictionOptimizer()(output_node) graph = graph_module.PlainGraph(input_node, output_node) model = graph.build_keras_graph().build(hp_module.HyperParameters()) assert model.input_shape == (None, 30) assert model.output_shape == (None, )
def build_mlp(user_num, item_num): input = Input(shape=[2]) user_emb_mlp = LatentFactorMapper(feat_column_id=0, id_num=user_num, embedding_dim=64)(input) item_emb_mlp = LatentFactorMapper(feat_column_id=1, id_num=user_num, embedding_dim=64)(input) output = MLPInteraction()([user_emb_mlp, item_emb_mlp]) output = RatingPredictionOptimizer()(output) model = RPRecommender(inputs=input, outputs=output) return model
def build_gmf(user_num, item_num): input = Input(shape=[2]) user_emb = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) output = InnerProductInteraction()([user_emb, item_emb]) output = RatingPredictionOptimizer()(output) model = RPRecommender(inputs=input, outputs=output) return model
def build_gmf(user_num, item_num): input = Input(shape=[2]) user_emb = LatentFactorMapper(feat_column_id=0, id_num=user_num, embedding_dim=64)(input) item_emb = LatentFactorMapper(feat_column_id=1, id_num=item_num, embedding_dim=64)(input) output = ElementwiseInteraction(elementwise_type="innerporduct")( [user_emb, item_emb]) output = RatingPredictionOptimizer()(output) model = RPRecommender(inputs=input, outputs=output) return model
def build_autorec(user_num, item_num): input = Input(shape=[2]) user_emb_1 = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb_1 = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) user_emb_2 = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb_2 = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) output = HyperInteraction()( [user_emb_1, item_emb_1, user_emb_2, item_emb_2]) output = RatingPredictionOptimizer()(output) model = RPRecommender(inputs=input, outputs=output) return model
def build_neumf(user_num, item_num): input = Input(shape=[2]) user_emb_gmf = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb_gmf = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) innerproduct_output = InnerProductInteraction()( [user_emb_gmf, item_emb_gmf]) user_emb_mlp = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb_mlp = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) mlp_output = MLPInteraction()([user_emb_mlp, item_emb_mlp]) output = RatingPredictionOptimizer()([innerproduct_output, mlp_output]) model = RPRecommender(inputs=input, outputs=output) return model
# load dataset ##Netflix Dataset # dataset_paths = ["./examples/datasets/netflix-prize-data/combined_data_" + str(i) + ".txt" for i in range(1, 5)] # data = NetflixPrizePreprocessor(dataset_paths) # Step 1: Preprocess data movielens = MovielensPreprocessor() train_X, train_y, val_X, val_y, test_X, test_y = movielens.preprocess() train_X_categorical = movielens.get_x_categorical(train_X) val_X_categorical = movielens.get_x_categorical(val_X) test_X_categorical = movielens.get_x_categorical(test_X) user_num, item_num = movielens.get_hash_size() # Step 2: Build the recommender, which provides search space # Step 2.1: Setup mappers to handle inputs input = Input(shape=[2]) user_emb_gmf = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb_gmf = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) user_emb_mlp = LatentFactorMapper(column_id=0, num_of_entities=user_num, embedding_dim=64)(input) item_emb_mlp = LatentFactorMapper(column_id=1, num_of_entities=item_num, embedding_dim=64)(input) # Step 2.2: Setup interactors to handle models innerproduct_output = InnerProductInteraction()([user_emb_gmf, item_emb_gmf])
criteo = CriteoPreprocessor( ) # the default arguments are setup to preprocess the Criteo example dataset train_X, train_y, val_X, val_y, test_X, test_y = criteo.preprocess() train_X_numerical, train_X_categorical = criteo.get_x_numerical( train_X), criteo.get_x_categorical(train_X) val_X_numerical, val_X_categorical = criteo.get_x_numerical( val_X), criteo.get_x_categorical(val_X) test_X_numerical, test_X_categorical = criteo.get_x_numerical( test_X), criteo.get_x_categorical(test_X) numerical_count = criteo.get_numerical_count() categorical_count = criteo.get_categorical_count() hash_size = criteo.get_hash_size() # Step 2: Build the recommender, which provides search space # Step 2.1: Setup mappers to handle inputs dense_input_node = Input(shape=[numerical_count]) sparse_input_node = Input(shape=[categorical_count]) dense_feat_emb = DenseFeatureMapper(num_of_fields=numerical_count, embedding_dim=2)(dense_input_node) sparse_feat_emb = SparseFeatureMapper(num_of_fields=categorical_count, hash_size=hash_size, embedding_dim=2)(sparse_input_node) # Step 2.2: Setup interactors to handle models attention_output = SelfAttentionInteraction()( [dense_feat_emb, sparse_feat_emb]) bottom_mlp_output = MLPInteraction()([dense_feat_emb]) top_mlp_output = MLPInteraction()([attention_output, bottom_mlp_output]) # Step 2.3: Setup optimizer to handle the target task output = CTRPredictionOptimizer()(top_mlp_output)
level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # load dataset mini_criteo = np.load("./examples/datasets/criteo/criteo_2M.npz") # TODO: preprocess train val split train_X = [ mini_criteo['X_int'].astype(np.float32), mini_criteo['X_cat'].astype(np.float32) ] train_y = mini_criteo['y'] val_X, val_y = train_X, train_y # build the pipeline. dense_input_node = Input(shape=[13]) sparse_input_node = Input(shape=[26]) dense_feat_emb = DenseFeatureMapper(num_of_fields=13, embedding_dim=2)(dense_input_node) # TODO: preprocess data to get sparse hash_size sparse_feat_emb = SparseFeatureMapper(num_of_fields=26, hash_size=[ 1444, 555, 175781, 128509, 306, 19, 11931, 630, 4, 93146, 5161, 174835, 3176, 28, 11255, 165206, 11, 4606, 2017, 4, 172322, 18, 16, 56456, 86, 43356 ], embedding_dim=2)(sparse_input_node)
# logging setting logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # load dataset criteo = CriteoPreprocessor( ) # automatically set up for preprocessing the Criteo dataset train_X, train_y, val_X, val_y, test_X, test_y = criteo.preprocess() # build the pipeline. input = Input(shape=[criteo.get_categorical_count()]) user_emb_gmf = LatentFactorMapper(column_id=0, num_of_entities=10000, embedding_dim=64)(input) item_emb_gmf = LatentFactorMapper(column_id=1, num_of_entities=10000, embedding_dim=64)(input) user_emb_mlp = LatentFactorMapper(column_id=0, num_of_entities=10000, embedding_dim=64)(input) item_emb_mlp = LatentFactorMapper(column_id=1, num_of_entities=10000, embedding_dim=64)(input) innerproduct_output = InnerProductInteraction()([user_emb_gmf, item_emb_gmf]) mlp_output = MLPInteraction()([user_emb_mlp, item_emb_mlp])