def test_save_and_load_model(self):
        model = TensorRec(n_components=10)
        model.fit(self.interactions, self.user_features, self.item_features, epochs=10)

        predictions = model.predict(user_features=self.user_features, item_features=self.item_features)
        ranks = model.predict_rank(user_features=self.user_features, item_features=self.item_features)
        model.save_model(directory_path=self.test_dir)

        # Check that, after saving, the same predictions come back
        predictions_after_save = model.predict(user_features=self.user_features, item_features=self.item_features)
        ranks_after_save = model.predict_rank(user_features=self.user_features, item_features=self.item_features)
        self.assertTrue((predictions == predictions_after_save).all())
        self.assertTrue((ranks == ranks_after_save).all())

        # Blow away the session
        set_session(None)
        tf.reset_default_graph()

        # Reload the model, predict, and check for equal predictions
        new_model = TensorRec.load_model(directory_path=self.test_dir)
        new_predictions = new_model.predict(user_features=self.user_features, item_features=self.item_features)
        new_ranks = new_model.predict_rank(user_features=self.user_features, item_features=self.item_features)

        self.assertTrue((predictions == new_predictions).all())
        self.assertTrue((ranks == new_ranks).all())
Exemple #2
0
 def test_fit_from_tfrecords(self):
     set_session(None)
     model = TensorRec(n_components=10)
     model.fit(self.interactions_path,
               self.user_features_path,
               self.item_features_path,
               epochs=10)
Exemple #3
0
    def test_save_and_load_model(self):
        model = TensorRec(n_components=10)
        model.fit(self.interactions,
                  self.user_features,
                  self.item_features,
                  epochs=10)

        predictions = model.predict(user_features=self.user_features,
                                    item_features=self.item_features)
        ranks = model.predict_rank(user_features=self.user_features,
                                   item_features=self.item_features)
        model.save_model(directory_path=self.test_dir)

        # Check that, after saving, the same predictions come back
        predictions_after_save = model.predict(
            user_features=self.user_features, item_features=self.item_features)
        ranks_after_save = model.predict_rank(user_features=self.user_features,
                                              item_features=self.item_features)
        self.assertTrue((predictions == predictions_after_save).all())
        self.assertTrue((ranks == ranks_after_save).all())

        # Blow away the session
        set_session(None)
        tf.reset_default_graph()

        # Reload the model, predict, and check for equal predictions
        new_model = TensorRec.load_model(directory_path=self.test_dir)
        new_predictions = new_model.predict(user_features=self.user_features,
                                            item_features=self.item_features)
        new_ranks = new_model.predict_rank(user_features=self.user_features,
                                           item_features=self.item_features)

        self.assertTrue((predictions == new_predictions).all())
        self.assertTrue((ranks == new_ranks).all())
 def test_wmrb_loss(self):
     model = TensorRec(loss_graph=WMRBLossGraph())
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=5,
               n_sampled_items=10)
 def setUpClass(cls):
     cls.interactions, cls.user_features, cls.item_features = generate_dummy_data_with_indicator(
         num_users=10, num_items=12, interaction_density=.5)
     model = TensorRec(n_components=10)
     model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10)
     cls.model = model
     cls.ranks = model.predict_rank(user_features=cls.user_features, item_features=cls.item_features)
 def test_balanced_wmrb_loss_biased(self):
     model = TensorRec(loss_graph=BalancedWMRBLossGraph(), biased=True)
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=5,
               n_sampled_items=10)
Exemple #7
0
    def test_basic_usage(self):
        # Build the model with default parameters
        model = TensorRec()

        # Generate some dummy data
        interactions, user_features, item_features = generate_dummy_data(
            num_users=100, num_items=150, interaction_density=.05)

        # Fit the model
        model.fit(interactions,
                  user_features,
                  item_features,
                  epochs=5,
                  verbose=True)

        # Predict scores for user 75 on items 100, 101, and 102
        predictions = model.predict(user_ids=[75, 75, 75],
                                    item_ids=[100, 101, 102],
                                    user_features=user_features,
                                    item_features=item_features)

        # Calculate and print the recall at 10
        r_at_k = recall_at_k(model,
                             interactions,
                             k=10,
                             user_features=user_features,
                             item_features=item_features)
        print(np.mean(r_at_k))

        self.assertIsNotNone(predictions)
Exemple #8
0
 def test_fit(self):
     interactions, user_features, item_features = generate_dummy_data(
         num_users=10, num_items=10, interaction_density=.5)
     model = TensorRec(n_components=10)
     model.fit(interactions, user_features, item_features, epochs=10)
     # Ensure that the nodes have been built
     self.assertIsNotNone(model.tf_prediction_dense)
Exemple #9
0
	def test_fit_predict_unbiased(self):
		model = TensorRec(n_components=10, biased=False)
		model.fit(self.interactions, self.user_features, self.item_features, epochs=10)

		predictions = model.predict(user_features=self.user_features,
									item_features=self.item_features)

		self.assertEqual(predictions.shape, (self.user_features.shape[0], self.item_features.shape[0]))
Exemple #10
0
 def test_fit_interactions_as_dataset(self):
     int_as_dataset = create_tensorrec_dataset_from_sparse_matrix(
         self.interactions)
     model = TensorRec(n_components=10)
     model.fit(int_as_dataset,
               self.user_features,
               self.item_features,
               epochs=10)
Exemple #11
0
 def test_fit(self):
     model = TensorRec(n_components=10)
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=10)
     # Ensure that the nodes have been built
     self.assertIsNotNone(model.tf_prediction)
Exemple #12
0
 def setUpClass(cls):
     cls.interactions, cls.user_features, cls.item_features = generate_dummy_data(
         num_users=10, num_items=12, interaction_density=.5)
     model = TensorRec(n_components=10)
     model.fit(cls.interactions,
               cls.user_features,
               cls.item_features,
               epochs=10)
     cls.model = model
Exemple #13
0
    def test_predict_user_repr_biased_fails(self):
        model = TensorRec(n_components=10)
        model.fit(self.interactions,
                  self.user_features,
                  self.item_features,
                  epochs=10)

        with self.assertRaises(NotImplementedError):
            model.predict_user_representation(self.user_features)
Exemple #14
0
    def test_predict_item_repr(self):
        model = TensorRec(n_components=10, biased=False)
        model.fit(self.interactions,
                  self.user_features,
                  self.item_features,
                  epochs=10)

        item_repr = model.predict_item_representation(self.item_features)
        self.assertEqual(item_repr.shape, (self.item_features.shape[0], 10))
Exemple #15
0
 def test_fit_from_datasets(self):
     uf_as_dataset = create_tensorrec_dataset_from_sparse_matrix(
         self.user_features, False)
     if_as_dataset = create_tensorrec_dataset_from_sparse_matrix(
         self.item_features, True)
     int_as_dataset = create_tensorrec_dataset_from_sparse_matrix(
         self.interactions, False)
     model = TensorRec(n_components=10)
     model.fit(int_as_dataset, uf_as_dataset, if_as_dataset, epochs=10)
Exemple #16
0
    def metric_test(self):
        """ uses tensorrec eval as benchmark for rating performance of various reco algorithms """
        k = 10
        latent_factor = 10
        n_users = 10
        n_items = 12

        interactions, user_features, item_features = util.generate_dummy_data_with_indicator(
            num_users=n_users, num_items=n_items, interaction_density=.5)
        print("interactiosn shape={}".format(np.shape(interactions)))
        print("user features shape={}".format(np.shape(
            user_features.toarray())))
        print("item features shape={}".format(np.shape(
            item_features.toarray())))

        model = TensorRec(n_components=latent_factor)

        model.fit(interactions, user_features, item_features, epochs=19)

        ranks = model.predict_rank(user_features=user_features,
                                   item_features=item_features)

        print("Ranks shape={}".format(np.shape(ranks)))

        self.assertTrue(np.shape(interactions) == np.shape(ranks))

        tr_recall_result = eval.recall_at_k(predicted_ranks=ranks,
                                            test_interactions=interactions,
                                            k=k,
                                            preserve_rows=False)
        # print (tr_recall_result.mean())

        tr_precision_result = eval.precision_at_k(
            predicted_ranks=ranks,
            test_interactions=interactions,
            k=k,
            preserve_rows=False)
        # print(tr_precision_result.mean())

        # we need csr for interactions data
        interactions_ = interactions.tocsr()
        recall_result = metrics.recall_at_k(ranks,
                                            interactions_,
                                            k=k,
                                            preserve_rows=False)
        # print(recall_result.mean())

        precision_result = metrics.precision_at_k(ranks,
                                                  interactions_,
                                                  k=k,
                                                  preserve_rows=False)
        # print (precision_result.mean())

        self.assertTrue(tr_recall_result.mean() == recall_result.mean())
        self.assertTrue(tr_precision_result.mean() == precision_result.mean())
Exemple #17
0
    def test_fit_fail_batching_dataset(self):
        model = TensorRec(n_components=10)

        interactions_as_dataset = create_tensorrec_dataset_from_sparse_matrix(
            self.interactions)
        with self.assertRaises(BatchNonSparseInputException):
            model.fit(interactions_as_dataset,
                      self.user_features,
                      self.item_features,
                      epochs=10,
                      user_batch_size=2)
Exemple #18
0
 def setUpClass(cls):
     cls.interactions, cls.user_features, cls.item_features = generate_dummy_data_with_indicator(
         num_users=10, num_items=12, interaction_density=.5)
     model = TensorRec(n_components=10)
     model.fit(cls.interactions,
               cls.user_features,
               cls.item_features,
               epochs=10)
     cls.model = model
     cls.ranks = model.predict_rank(user_features=cls.user_features,
                                    item_features=cls.item_features)
Exemple #19
0
    def test_predict(self):
        interactions, user_features, item_features = generate_dummy_data(
            num_users=10, num_items=20, interaction_density=.5)
        model = TensorRec(n_components=10)
        model.fit(interactions, user_features, item_features, epochs=10)

        predictions = model.predict(user_features=user_features,
                                    item_features=item_features)

        self.assertEqual(predictions.shape,
                         (user_features.shape[0], item_features.shape[0]))
Exemple #20
0
 def test_wmrb_loss(self):
     model = TensorRec(loss_graph=WMRBLossGraph(),
                       stratified_sample=True,
                       logdir='/Users/jasonchen/tmp/test',
                       log_interval=100)
     model.fit(self.interactions.tocsr(),
               self.user_features.tocsr(),
               self.item_features.tocsr()[:self.n_test_item],
               epochs=10,
               verbose=True,
               train_threads=5,
               use_reg=True)
Exemple #21
0
    def test_predict(self):
        interactions, user_features, item_features = generate_dummy_data(
            num_users=10, num_items=10, interaction_density=.5)
        model = TensorRec(n_components=10)
        model.fit(interactions, user_features, item_features, epochs=10)

        predictions = model.predict(user_ids=[1, 2, 3],
                                    item_ids=[4, 5, 6],
                                    user_features=user_features,
                                    item_features=item_features)

        self.assertEqual(len(predictions), 3)
Exemple #22
0
    def test_movie_lens_fit_wmrb(self):
        """
        This test checks whether the movielens getter works and that the resulting data is viable for fitting/testing a
        TensorRec model.
        """
        train_interactions, test_interactions, user_features, item_features = self.movielens_100k

        model = TensorRec(loss_graph=wmrb_loss)
        model.fit(interactions=train_interactions,
                  user_features=user_features,
                  item_features=item_features)
        predictions = model.predict(user_features=user_features,
                                    item_features=item_features)

        self.assertIsNotNone(predictions)
    def test_save_and_load_model_same_session(self):
        model = TensorRec(n_components=10)
        model.fit(self.interactions, self.user_features, self.item_features, epochs=10)

        predictions = model.predict(user_features=self.user_features, item_features=self.item_features)
        ranks = model.predict_rank(user_features=self.user_features, item_features=self.item_features)
        model.save_model(directory_path=self.test_dir)

        # Reload the model, predict, and check for equal predictions
        new_model = TensorRec.load_model(directory_path=self.test_dir)
        new_predictions = new_model.predict(user_features=self.user_features, item_features=self.item_features)
        new_ranks = new_model.predict_rank(user_features=self.user_features, item_features=self.item_features)

        self.assertTrue((predictions == new_predictions).all())
        self.assertTrue((ranks == new_ranks).all())
Exemple #24
0
	def test_save_and_load_model_same_session(self):
		model = TensorRec(n_components=10)
		model.fit(self.interactions, self.user_features, self.item_features, epochs=10)

		predictions = model.predict(user_features=self.user_features, item_features=self.item_features)
		ranks = model.predict_rank(user_features=self.user_features, item_features=self.item_features)
		model.save_model(directory_path=self.test_dir)

		# Reload the model, predict, and check for equal predictions
		new_model = TensorRec.load_model(directory_path=self.test_dir)
		new_predictions = new_model.predict(user_features=self.user_features, item_features=self.item_features)
		new_ranks = new_model.predict_rank(user_features=self.user_features, item_features=self.item_features)

		self.assertEqual(predictions.all(), new_predictions.all())
		self.assertEqual(ranks.all(), new_ranks.all())
    def test_movie_lens_fit(self):
        """
        This test checks whether the movielens getter works and that the resulting data is viable for fitting/testing a
        TensorRec model.
        """
        train_interactions, test_interactions, user_features, item_features, _ = self.movielens_100k

        model = TensorRec()
        model.fit(interactions=train_interactions,
                  user_features=user_features,
                  item_features=item_features,
                  epochs=5)
        predictions = model.predict(user_features=user_features,
                                    item_features=item_features)

        self.assertIsNotNone(predictions)
    def test_fit(self, name, user_repr, item_repr, n_user_features,
                 n_item_features, n_components):
        interactions, user_features, item_features = generate_dummy_data(
            num_users=15,
            num_items=30,
            interaction_density=.5,
            num_user_features=n_user_features,
            num_item_features=n_item_features,
            n_features_per_user=20,
            n_features_per_item=20,
            pos_int_ratio=.5)
        model = TensorRec(n_components=n_components,
                          user_repr_graph=user_repr(),
                          item_repr_graph=item_repr())
        model.fit(interactions, user_features, item_features, epochs=10)

        # Ensure that the nodes have been built
        self.assertIsNotNone(model.tf_prediction)
Exemple #27
0
 def test_fit_fail_bad_input(self):
     model = TensorRec(n_components=10)
     with self.assertRaises(ValueError):
         model.fit(np.array([1, 2, 3, 4]), self.user_features, self.item_features, epochs=10)
     with self.assertRaises(ValueError):
         model.fit(self.interactions, np.array([1, 2, 3, 4]), self.item_features, epochs=10)
     with self.assertRaises(ValueError):
         model.fit(self.interactions, self.user_features, np.array([1, 2, 3, 4]), epochs=10)
    def test_fit_fail_on_bad_dims(self):
        interactions, user_features, item_features = generate_dummy_data(
            num_users=15,
            num_items=30,
            interaction_density=.5,
            num_user_features=30,
            num_item_features=20,
            n_features_per_user=20,
            n_features_per_item=20,
            pos_int_ratio=.5)

        with self.assertRaises(ValueError):
            model = TensorRec(
                n_components=25,
                user_repr_graph=FeaturePassThroughRepresentationGraph(),
                item_repr_graph=LinearRepresentationGraph())
            model.fit(interactions, user_features, item_features, epochs=10)

        with self.assertRaises(ValueError):
            model = TensorRec(
                n_components=25,
                user_repr_graph=LinearRepresentationGraph(),
                item_repr_graph=FeaturePassThroughRepresentationGraph())
            model.fit(interactions, user_features, item_features, epochs=10)
Exemple #29
0
    def test_fit_fail_mismatched_batches(self):
        model = TensorRec(n_components=10)
        with self.assertRaises(ValueError):
            model.fit(
                self.interactions, [self.user_features, self.user_features],
                [self.item_features, self.item_features, self.item_features],
                epochs=10)

        with self.assertRaises(ValueError):
            model.fit(self.interactions,
                      [self.user_features, self.user_features],
                      [self.item_features, self.item_features],
                      epochs=10)

        model.fit([self.interactions, self.interactions],
                  [self.user_features, self.user_features],
                  self.item_features,
                  epochs=10)

        model.fit([self.interactions, self.interactions],
                  [self.user_features, self.user_features],
                  [self.item_features, self.item_features],
                  epochs=10)
    def test_fit_fail_mismatched_batches(self):
        model = TensorRec(n_components=10)
        with self.assertRaises(ValueError):
            model.fit(self.interactions,
                      [self.user_features, self.user_features],
                      [self.item_features, self.item_features, self.item_features],
                      epochs=10)

        with self.assertRaises(ValueError):
            model.fit(self.interactions,
                      [self.user_features, self.user_features],
                      [self.item_features, self.item_features],
                      epochs=10)

        model.fit([self.interactions, self.interactions],
                  [self.user_features, self.user_features],
                  self.item_features,
                  epochs=10)

        model.fit([self.interactions, self.interactions],
                  [self.user_features, self.user_features],
                  [self.item_features, self.item_features],
                  epochs=10)
 def test_fit_from_datasets(self):
     uf_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.user_features)
     if_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.item_features)
     int_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.interactions)
     model = TensorRec(n_components=10)
     model.fit(int_as_dataset, uf_as_dataset, if_as_dataset, epochs=10)
 def test_fit_from_tfrecords(self):
     set_session(None)
     model = TensorRec(n_components=10)
     model.fit(self.interactions_path, self.user_features_path, self.item_features_path, epochs=10)
 def test_fit_verbose(self):
     model = TensorRec(n_components=10)
     model.fit(self.interactions, self.user_features, self.item_features, epochs=10, verbose=True)
     # Ensure that the nodes have been built
     self.assertIsNotNone(model.tf_prediction)
 def test_fit_batched(self):
     model = TensorRec(n_components=10)
     model.fit(self.interactions, self.user_features, self.item_features, epochs=10, user_batch_size=2)
     # Ensure that the nodes have been built
     self.assertIsNotNone(model.tf_prediction)
Exemple #35
0
 def test_separation_loss(self):
     model = TensorRec(loss_graph=separation_loss)
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=5)
Exemple #36
0
def main():
    # 데이터 로드

    ## 마스터 데이터(상호 작용)
    masterdf = pd.read_csv('./data/Transactions.csv')
    masterdf.columns = ['Transaction ID', 'Customer ID', 'Transaction Date', 'Prod Subcat Code',
            'Prod Cat Code', 'Qty', 'Rate', 'Tax', 'Total Amt', 'Store Type'] # 데이터 정리 및 표준화를 위해 데이터 열 명칭 변경

    masterdf['Store Type Code'] = pd.factorize(masterdf['Store Type'])[0] # 상점 코드 타입을 숫자형으로 변경하여 새 열에 저장

    masterdf['Date'] =  pd.DatetimeIndex(masterdf['Transaction Date'], dayfirst=True).date # 거래 날짜를 pandas의 datetime index로 표준화 

    masterdf['Net Sales'] = masterdf['Qty'] * masterdf['Rate'] # quantity와 based price에서 총 순 매출액(Net sales) 계산 (도시마다의 세금이 다를 수 있어 세금 제외)

    masterdf['Material'] = masterdf['Prod Cat Code'].astype(str) + '-' + masterdf['Prod Subcat Code'].astype(str) + '-' + masterdf['Store Type'].astype(str) # category, subcategory, store type을 이용하여 고유한 material 표시기를 생성
    masterdf[['Prod Cat Code','Prod Subcat Code', 'Store Type', 'Material']].drop_duplicates(subset='Material')

    ## 소비자 데이터(소비자 특성)
    custdf = pd.read_csv('./data/Customer.csv')
    custdf.columns = ['Customer ID', 'DOB', 'Gender', 'City Code']

    ## 아이탬 특징 데이터
    skudf = pd.read_csv('./data/prod_cat_info.csv')
    skudf.columns = ['Prod Cat Code', 'Prod Cat', 'Prod Sub Cat Code', 'Prod Subcat']



    # 데이터 생성

    ## RECENCY (최신성)
    recency_df = masterdf.groupby('Customer ID').Date.max().reset_index()
    recency_df.columns = ['Customer ID','Last Purchase']
    recency_df['Recency'] = recency_df['Last Purchase'].apply(lambda x: (now - x).days)
    recency_df = recency_df[['Customer ID', 'Recency']]

    ## FREQUENCY (빈도)
    frequency_df = masterdf.groupby('Customer ID')['Date'].count().reset_index()
    frequency_df.columns = ['Customer ID','Frequency']

    ## MONETARY (금액)
    monetary_df = masterdf.groupby('Customer ID')['Net Sales'].sum().reset_index()
    monetary_df.columns = ['Customer ID','Monetary']

    ## VARIETY (종류)
    variety_df = masterdf.groupby('Customer ID')['Material'].nunique().reset_index()
    variety_df.columns = ['Customer ID','Variety']

    ## RFMV
    rfmv = recency_df.copy()
    rfmv = rfmv.merge(frequency_df, on='Customer ID')
    rfmv = rfmv.merge(monetary_df, on='Customer ID')
    rfmv = rfmv.merge(variety_df, on='Customer ID')

    rfmv_quantiles = rfmv.iloc[:, 1:].quantile(q = [0.25, 0.5, 0.75]).to_dict() # R, F, M, V의 25%, 50%, 75%의 사분위수를 dictonary 형식으로 저장

    rfmv2 = rfmv.copy()
    rfmv2['R_q'] = rfmv2['Recency'].apply(RecencyScore, args=('Recency', rfmv_quantiles ))
    rfmv2['F_q'] = rfmv2['Frequency'].apply(FMVScore, args=('Frequency', rfmv_quantiles ))
    rfmv2['M_q'] = rfmv2['Monetary'].apply(FMVScore, args=('Monetary', rfmv_quantiles ))
    rfmv2['V_q'] = rfmv2['Variety'].apply(FMVScore, args=('Variety', rfmv_quantiles ))

    rfmv2 = rfmv2[['Customer ID', 'R_q', 'F_q', 'M_q', 'V_q',]]

    ## 각 구성 요소의 총 점수 합계

    rfmv2['Total_Score'] = rfmv2['R_q'] + rfmv2['F_q'] + rfmv2['M_q'] + rfmv2['V_q']

    rfmv2 = rfmv2[['Customer ID', 'Total_Score']]

    # 중요(IMPORTANT) : 인덱스를 고객 번호로 설정
    rfmv2.index = rfmv2['Customer ID']
    rfmv2 = rfmv2.drop('Customer ID', 1)

    # 최적의 군집 수를 찾기 위해 elbow 방식 (차후 이 과정을 조정할 필요가 있음)
    wcss = []
    for i in range(2,10):
        kmeans = KMeans(n_clusters=i, 
                        init='k-means++')
        kmeans.fit(rfmv2)
        wcss.append(kmeans.inertia_)
        
    # 위 "elbow" 그래프의 최적의 수를 이용하여 KMean 군집 적용
    kmeans = KMeans(n_clusters=4, 
                    init='random', 
                    random_state=None)

    clusters = kmeans.fit_predict(rfmv2)

    ### 군집 결과를 원본 rfmv 데이터에 추가
    rfmv['Clusters'] = clusters


    # Recommendation Weight
    active_cust = rfmv[rfmv.Recency < 365] # 최근 1년(365일)을 기준으로 하여 실고객에게 추천

    cleaned_df = masterdf.merge(active_cust[['Customer ID','Clusters']], how='left', on='Customer ID') # 군집화된 고객 특징을 마스터 데이터에 결합
    cleaned_df = cleaned_df[cleaned_df['Clusters'].notnull()] # 군집을 기준으로 null 값이 존재하는 행 삭제
    cleaned_df = cleaned_df.merge(custdf[['Customer ID', 'City Code']], how='left', on='Customer ID') ## 소비자 데이터 추가
    cleaned_df = cleaned_df.merge(skudf[['Prod Cat', 'Prod Cat Code']], how='left', on='Prod Cat Code') # sku 특징(물품 카테고리) 를 마스터 데이터에 결합

    # 필수 열 가져오기
    final_cleaned_df = cleaned_df
    final_cleaned_df = final_cleaned_df[['Prod Cat','Material','Qty','Customer ID','Clusters',]]

    # 고유한 고객 목록 유지, 중복 제거
    cust_grouped = final_cleaned_df.groupby(['Customer ID',
                                            'Prod Cat',
                                            'Material',
                                            'Clusters']).sum().reset_index()

    ## Interaction Matrix 
    interactions = cust_grouped.groupby(['Customer ID', 'Material'])['Qty'].sum().unstack().fillna(0)

    minmaxscaler = preprocessing.MinMaxScaler()
    interactions_scaled = minmaxscaler.fit_transform(interactions)
    interactions_scaled = pd.DataFrame(interactions_scaled)

    interactions_scaled.index = interactions.index
    interactions_scaled.columns = interactions.columns

    ## User Features Matrix 
    cust_qty = cust_grouped.groupby(['Customer ID', 'Prod Cat'])['Qty'].sum().unstack().fillna(0)

    minmaxscaler = preprocessing.MinMaxScaler()
    cust_qty_scaled = minmaxscaler.fit_transform(cust_qty)
    cust_qty_scaled = pd.DataFrame(cust_qty_scaled)
    cust_qty_scaled.index = cust_qty.index
    cust_qty_scaled.columns = cust_qty.columns

    cust_clus = cust_grouped.groupby(['Customer ID', 'Clusters'])['Clusters'].nunique().unstack().fillna(0)

    customer_features = pd.merge(cust_qty_scaled, cust_clus, left_index=True, right_index=True, how='inner')
    customer_features = customer_features.rename(columns={0: 'Cluster 0', 
                                                        1: 'Cluster 1', 
                                                        2: 'Cluster 2', 
                                                        3: 'Cluster 3', 
                                                        4: 'Cluster 4'})

    ### Item Features Matrix
    item_category = pd.DataFrame(cust_grouped.groupby(['Material', 
                                                'Prod Cat'])['Qty'].sum().unstack().fillna(0).reset_index().set_index('Material'))

    minmaxscaler = preprocessing.MinMaxScaler()
    item_category_scaled = minmaxscaler.fit_transform(item_category)
    item_category_scaled = pd.DataFrame(item_category_scaled)
    item_category_scaled.index = item_category.index
    item_category_scaled.columns = item_category.columns


    interaction_f = sparse.coo_matrix(interactions_scaled)
    user_f  = sparse.coo_matrix(customer_features) 
    item_f  = sparse.coo_matrix(item_category_scaled) 

    mask_size = len(interaction_f.data)

    np.random.choice(a=[False, True], 
                    size=mask_size, 
                    p=[.2, .8])

    ## train, test data
    train_interactions, test_interactions = interaction_masking(interaction_f)

    user_features  = user_f
    item_features = item_f


    # train 

    ## 모델 파라미터
    epochs = 100 
    alpha = 0.01 
    n_components =  10

    verbose = True
    learning_rate = 0.01
    n_sampled_items = int(item_features.shape[0] * .1)
    biased = False
    
    k_val  = 100


    model = TensorRec(n_components = n_components,                 
                    user_repr_graph = DeepRepresentationGraph(),
                    item_repr_graph = NormalizedLinearRepresentationGraph(),
                    loss_graph = WMRBLossGraph(), 
                    biased=biased)

    model.fit(train_interactions, 
            user_features, 
            item_features, 
            epochs=epochs, 
            verbose=False, 
            alpha=alpha, 
            n_sampled_items=n_sampled_items,
            learning_rate=learning_rate)


    predicted_ranks = model.predict_rank(user_features=user_features,
                                        item_features=item_features)

    r_at_k_test = recall_at_k(predicted_ranks, test_interactions, k=80)
    r_at_k_train = recall_at_k(predicted_ranks, train_interactions, k=80)
    print("Recall at @k: Train: {:.2f} Test: {:.2f}".format(r_at_k_train.mean(), r_at_k_test.mean()))

    # produce the ranking into a readable table (dataframe it is)
    ranks_df = pd.DataFrame(predicted_ranks)
    ranks_df.columns = item_category_scaled.index
    ranks_df.index = customer_features.index
    ranks_df = ranks_df.T

    ranks_df.to_csv('./result/ranks_df.csv')
 def test_dot_product(self):
     model = TensorRec(prediction_graph=DotProductPredictionGraph())
     model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
 def test_dot_product(self):
     model = TensorRec(prediction_graph=DotProductPredictionGraph())
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=5)
    def test_fit_fail_batching_dataset(self):
        model = TensorRec(n_components=10)

        interactions_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.interactions)
        with self.assertRaises(ValueError):
            model.fit(interactions_as_dataset, self.user_features, self.item_features, epochs=10, user_batch_size=2)
 def test_balanced_wmrb_loss_biased(self):
     model = TensorRec(loss_graph=BalancedWMRBLossGraph(), biased=True)
     model.fit(self.interactions, self.user_features, self.item_features, epochs=5, n_sampled_items=10)
 def test_wmrb_loss(self):
     model = TensorRec(loss_graph=WMRBLossGraph())
     model.fit(self.interactions, self.user_features, self.item_features, epochs=5, n_sampled_items=10)
 def test_rmse_dense_loss_biased(self):
     model = TensorRec(loss_graph=RMSEDenseLossGraph(), biased=True)
     model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
 def test_rmse_loss(self):
     model = TensorRec(loss_graph=RMSELossGraph())
     model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
Exemple #44
0
 def test_wmrb_loss_biased(self):
     model = TensorRec(loss_graph=wmrb_loss, biased=True)
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=5)
 def test_cos_distance(self):
     model = TensorRec(prediction_graph=CosineSimilarityPredictionGraph())
     model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
 def test_cos_distance(self):
     model = TensorRec(prediction_graph=CosineSimilarityPredictionGraph())
     model.fit(self.interactions,
               self.user_features,
               self.item_features,
               epochs=5)