def run_model(model_name, dataset_name, test_size=0.3, clean=False): print('*' * 70) print('\tThis is %s model trained on %s with test_size = %.2f' % (model_name, dataset_name, test_size)) print('*' * 70 + '\n') model_manager = utils.ModelManager(dataset_name, test_size) try: trainset = model_manager.load_model('trainset') testset = model_manager.load_model('testset') except OSError: ratings = DataSet.load_dataset(name=dataset_name) trainset, testset = DataSet.train_test_split(ratings, test_size=test_size) model_manager.save_model(trainset, 'trainset') model_manager.save_model(testset, 'testset') '''Do you want to clean workspace and retrain model again?''' '''if you want to change test_size or retrain model, please set clean_workspace True''' model_manager.clean_workspace(clean) if model_name == 'UserCF': model = UserBasedCF() elif model_name == 'ItemCF': model = ItemBasedCF() elif model_name == 'Random': model = RandomPredict() elif model_name == 'MostPopular': model = MostPopular() elif model_name == 'UserCF-IIF': model = UserBasedCF(use_iif_similarity=True) elif model_name == 'ItemCF-IUF': model = ItemBasedCF(use_iuf_similarity=True) else: raise ValueError('No model named' + model_name) model.fit(trainset) recommend_test(model, [1, 100, 233, 666, 888]) model.test(testset)
count = cur.execute( "select `id` from `book` where `id` not in(" "select distinct `book_id` from `read_record`) " "and `id` not in(select distinct `book_id` from `buy_record`)") result = cur.fetchmany(count) all_book_not_been_read = [] for row in result: all_book_not_been_read.append(row[0]) cur.close() conn.commit() conn.close() except Exception, e: print Exception, ':', e preprocessed_dataset = data_preprocessing.preprocess_data(dataset) mp = MostPopular(preprocessed_dataset) mp.calc_item_popularity() mp_by_ratio = MPByCategoryRatio(preprocessed_dataset, item_category_map) mp_by_ratio.calc_item_popularity() user_cf = HieraKmeansUserCF(preprocessed_dataset, item_category_map, n_sample, max_iter) user_cf.calc_user_sim() mutex.acquire() PREPROCESSED_DATASET = preprocessed_dataset ALL_BOOK_NOT_BEEN_READ = all_book_not_been_read MP = mp MP_BY_RATIO = mp_by_ratio USER_CF = user_cf mutex.release() print 'update complete'