Beispiel #1
0
    def test_winsorize(self):
        #Check that winsorizing occurred at top and bottom 20%
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [1, 2, 3, 4, 5]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.winsorize_quantTrans(lower=0.2, upper=0.2)
        self.assertEqual(sum(rate.quantity == np.array([2, 2, 3, 4, 4])), 5)

        #Check that winsorizing occurred at top and bottom 50% with zeros being ignored
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [0, 0, 3, 4, 5]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.winsorize_quantTrans(lower=0.5, upper=0.5)
        self.assertEqual(sum(rate.quantity == np.array([0, 0, 4, 4, 4])), 5)

        #Check that winsorizing occurred at top and bottom 20% with zeros being included
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [0, 2, 3, 4, 5]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.winsorize_quantTrans(lower=0.2, upper=0.2, ignore_zero=False)
        self.assertEqual(sum(rate.quantity == np.array([2, 2, 3, 4, 4])), 5)
Beispiel #2
0
    def test_sparseMatrix(self):
        #Check that data is being properly pivoted
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'B1234', 'B1234', 'B1234'],
            'MASTER_PKG_SKU_CD': ['1234', '5678', '1234', '5678', '9012'],
            'L90_TY_QTY': [10, 20, 30, 40, 50]
        }

        df = pd.DataFrame(data)
        rate = ratings(df)
        sp_matrix = rate.sparse_matrix()
        self.assertTrue(sp_matrix.shape == (2, 3))
        self.assertEqual(
            sum((sp_matrix.toarray() == np.array([[10., 20., 0.],
                                                  [30., 40., 50.]])).ravel()),
            6)

        #check if items are ordered that data is properly pivoted
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'B1234', 'B1234', 'B1234'],
            'MASTER_PKG_SKU_CD': ['1234', '5678', '5678', '1234', '9012'],
            'L90_TY_QTY': [10, 20, 30, 40, 50]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        sp_matrix = rate.sparse_matrix()
        self.assertEqual(
            sum((sp_matrix.toarray() == np.array([[10., 20., 0.],
                                                  [40., 30., 50.]])).ravel()),
            6)
Beispiel #3
0
	def __init__(self):
		fname = 'movie_matrix.p'
		if os.path.isfile(fname):
			self.matrix = pickle.load(open( fname, "rb" ) )
			return
		matrix = {}
		mvs = movies('ds\\movies.csv')
		rats = ratings('ds\\ratings.csv')
		tgs = tags('ds\\tags.csv')
		for id in mvs.gen_mat:
			gm = mvs.gen_mat[id]
			ym = [0]*len(list(mvs.year_mat.values())[0])
			if id in mvs.year_mat:
				ym = mvs.year_mat[id]

			tm = [0]*len(list(tgs.tag_mov_mat.values())[0])
			if id in tgs.tag_mov_mat:
				tm = tgs.tag_mov_mat[id]

			rm = [0]*len(list(rats.mov_rat_mat.values())[0])
			if id in rats.mov_rat_mat:
				rm = rats.mov_rat_mat[id]
			matrix[id] = gm + ym + tm + rm

		pickle.dump(matrix, open(fname, "wb"))
		self.matrix = matrix
Beispiel #4
0
 def test_natLog(self):
     #Check that values are converted to natural log and that 0's have 1 add to it before transforming
     data = {
         'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
         'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
         'L90_TY_QTY': [0, 0, 1, 2, 3]
     }
     df = pd.DataFrame(data)
     rate = ratings(df)
     rate.natLog_rateTrans()
     self.assertEqual(round(sum(rate.rating), 3), 3.178)
Beispiel #5
0
 def test_percStore(self):
     #Check that percent store is working properly
     data = {
         'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
         'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
         'L90_TY_QTY': [20, 50, 15, 10, 5]
     }
     df = pd.DataFrame(data)
     rate = ratings(df)
     rate.percStore_rateTrans()
     self.assertEqual(
         sum(rate.rating == np.array([0.20, 0.50, 0.15, 0.10, 0.05])), 5)
Beispiel #6
0
    def test_binary(self):
        #Check that values are converted to 0,1 based on a 0 threshold
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [-1, -2, 0, 4, 5]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.binary_rateTrans()
        self.assertEqual(sum(rate.rating == np.array([0, 0, 0, 1, 1])), 5)

        #Check that values are converted to -2,2 based on a 2 threshold
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [-1, -2, 0, 4, 5]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.binary_rateTrans(p=2, n=-2, thresh=2)
        self.assertEqual(sum(rate.rating == np.array([-2, -2, -2, 2, 2])), 5)
Beispiel #7
0
    def test_lte(self):
        #Check negatives are being converted and assigned to zero
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [0, -1, 1, -0.1, -10]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.lte_quantTrans()
        self.assertEqual(sum(rate.quantity == np.array([0, 0, 1, 0, 0])), 5)

        #Check negatives are being converted to -1 and varying threshold from default
        data = {
            'TDLINX_STORE_CD': ['A1234', 'A1234', 'A1234', 'A1234', 'A1234'],
            'MASTER_PKG_SKU_CD': ['1234', '1234', '1234', '1234', '1234'],
            'L90_TY_QTY': [1, -1, 10, -0.1, -10]
        }
        df = pd.DataFrame(data)
        rate = ratings(df)
        rate.lte_quantTrans(thresh=1, value=-1)
        self.assertEqual(sum(rate.quantity == np.array([-1, -1, 10, -1, -1])),
                         5)
import inputdata
import ratings

data = inputdata.raw_scores
r = ratings.ratings(data)
result = r.recommendations()
print result
Beispiel #9
0
prod_info = prod_info[prod_info['STOCK_TYPE_CD'].isin(['S', 'D'])]
master_sku = list(prod_info.MASTER_SKU_CD.unique())
master_sku = [str(sku) for sku in master_sku]
master_pkg = list(prod_info.MASTER_PKG_CD.unique())

#Load depletions data
data = civis.io.read_civis(table="cbi.IL_AL_AK_OFF_L90",
                           database="Constellation Brands",
                           use_pandas=True)

mkts = ['AL', 'AK', 'IL']

for m in mkts:

    data_ratings = ratings(data[data['mkt_cd'] == m].drop('mkt_cd', axis=1),
                           quantCol='l90_ty_qty',
                           storeCol='tdlinx_store_cd',
                           productCol='master_pkg_sku_cd')

    #Data preprocessing
    data_ratings.lte_quantTrans()  #Remove negatives
    data_ratings.winsorize_quantTrans()  #Winsorize
    data_ratings.natLog_rateTrans()  #Convert to natural log scale
    data_sparse = data_ratings.sparse_matrix()

    #Prediction
    os.environ['OPENBLAS_NUM_THREADS'] = '1'
    param = {
        'alpha': [1, 10, 100],
        'factors': [10, 20, 40, 80],
        'regularization': [0.001, 0.1]
    }
key = 'Rec_Eng/Product_info.csv'
path = 'Product_info.csv'
cb_s3.pull_file_from_s3(key, path)
prod_info = pd.read_csv(path, encoding="ISO-8859-1")
prod_info = prod_info[prod_info['STOCK_TYPE_CD'].isin(['S', 'D'])]
master_sku = list(prod_info.MASTER_SKU_CD.unique())
master_sku = [str(sku) for sku in master_sku]
master_pkg = list(prod_info.MASTER_PKG_CD.unique())

#Load depletions data
data = civis.io.read_civis(table="cbi.IL_OFF_L90",
                           database="Constellation Brands",
                           use_pandas=True)

il_off_data = ratings(data,
                      quantCol='l90_ty_qty',
                      storeCol='tdlinx_store_cd',
                      productCol='master_pkg_sku_cd')

#Data preprocessing
il_off_data.lte_quantTrans()  #Remove negatives
il_off_data.winsorize_quantTrans()  #Winsorize
il_off_data.natLog_rateTrans()  #Convert to natural log scale
il_sparse = il_off_data.sparse_matrix()

#Prediction
import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'
param = {'alpha': [1, 100], 'factors': [10, 80], 'regularization': [0.001]}
d_test, opt_model, pred = implicit2.grid_search(il_sparse,
                                                param,
                                                itera=1,