def factorization( model_path='models/simple_factorization', simple=True, use_item_data = True, item_data_output='data/model_data/item_data.csv', model_params={'nmf':False, 'verbose': False} ): """builds and saves factorization model""" get.model_data(**data_params) mat = gl.SFrame.read_csv(data_params['output_path']) if simple: mat = mat[['user_id', 'item_id']] if use_item_data: get.item_data(item_data_output) item_data = gl.SFrame.read_csv(item_data_output) model = gl.recommender.ranking_factorization_recommender.create( mat, item_data=item_data, **model_params ) else: model = gl.recommender.ranking_factorization_recommender.create( mat, **model_params ) save_model(model_path, model) log.info('saved model as %s' % model_path)
def item_similarity(data, model_path='models/item_similarity', model_params={'similarity_type': 'jaccard', 'verbose': False} ): """builds and saves item similarity model""" get.model_data(**data_params) mat = gl.SFrame.read_csv(data_params['output_path']) mat = mat[['user_id', 'item_id']] model = gl.recommender.item_similarity_recommender.create( mat, **model_params ) return model
def main(): """builds an item similarity model from completed trades also samples recommendations and evaluates precision/recall of model""" # build and save model df = get.model_data(sparse=4, outlier=10000, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH) mat = gl.SFrame(df[['user_id', 'item_id']]) model = gl.recommender.item_similarity_recommender.create(mat) save_model(MODEL_PATH, model) # get recommendations for selected users ft, user_beers, beers = get.rec_data(FTISO_PATH, TRADE_PATH, TRADE_ITEM_PATH, BEERS_PATH) users = [3381, 14239, 6601, 8958, 1440, 880, 57] recommends = check_recs(users, model, ft, user_beers, beers) # find precision and recall for model train, test = gl.recommender.util.random_split_by_user(gl.SFrame(user_beers)) model = gl.recommender.item_similarity_recommender.create(train) pr = model.evaluate(test, metric='precision_recall') return recommends, pr
def load_rec_data(ftiso_path, trade_path, trade_item_path): ftiso = pd.read_csv( '../data/raw_data/ftiso.csv', header=None, names=[ 'ID', 'Beer ID', 'Quantity', 'Cellar Quantity', 'User ID', 'Type', 'Accessible List', 'Created', 'Modified'] ) ft = ftiso[ftiso['Type'] == 'ft'] data = get.model_data(sparse=0, outlier=10000000, iso_rate=1, proposed_rate=1, traded_rate=1, trade_path='../data/raw_data/trades.csv', trade_item_path='../data/raw_data/trade_items.csv', ftiso_path='../data/raw_data/ftiso.csv') user_beers = pd.read_csv('../data/model_data/all_user_beers')[['user_id', 'item_id']] beers = pd.read_csv('../data/raw_data/beers.csv') return ft, user_beers, beers
def _load_data(): """create and csvs for training the model and making recommendations""" # item similarity model csv get.model_data(sparse=4, outlier=10000, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH, output_path=IS_DF_PATH) # get nmf model csv get.model_data(sparse=3, outlier=500, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH, output_path=NMF_DF_PATH) # get popularity model csv get.model_data(sparse=0, outlier=500, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH, output_path=POP_DF_PATH)
def main(): """builds an item similarity model from completed trades, including beer side data""" # load data # if exists thing df = get.model_data(sparse=4, outlier=10000, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH) mat = gl.SFrame(df[['user_id', 'item_id']]) # build model model = gl.recommender.item_similarity_recommender.create(mat) # save model save_model(MODEL_PATH, model)
def build_nmf_model(): """builds an nmf model from completed trades, including beer side data""" # load data df = get.model_data(sparse=4, outlier=500, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH) mat = gl.SFrame(df[['user_id', 'item_id']]) item_data = gl.SFrame(get.item_data()) # build model model = gl.recommender.ranking_factorization_recommender.create( mat, item_data=item_data, nmf=False ) return model
def build_nmf_model(): """builds an nmf model from completed trades, including beer side data""" # load data df = get.model_data(sparse=4, outlier=500, iso_rate=0, proposed_rate=0, traded_rate=1, trade_path=TRADE_PATH, trade_item_path=TRADE_ITEM_PATH, ftiso_path=FTISO_PATH) mat = gl.SFrame(df[['user_id', 'item_id']]) item_data = gl.SFrame(get.item_data()) # build model model = gl.recommender.ranking_factorization_recommender.create(mat, item_data=item_data, nmf=False) # check recs users = [3381, 14239, 6601, 8958, 1440, 880, 57] ft, user_beers, beers = check.load_rec_data() recommends = check.recs(users, model, ft, user_beers, beers) # # check precision & recall # train, test = gl.recommender.util.random_split_by_user(gl.SFrame(user_beers)) # model = gl.recommender.ranking_factorization_recommender.create(train, item_data=None, nmf=False, verbose=False) # pr = model.evaluate(test, metric='precision_recall') return model, recommends#, pr
def main(): get.model_data(**data_params) mat = gl.SFrame.read_csv(data_params['output_path'])