コード例 #1
0
ファイル: main.py プロジェクト: LhWorld/MovieLens-Recommender
def run_model(model_name, dataset_name, test_size=0.3, clean=False):
    print('*' * 70)
    print('\tThis is %s model trained on %s with test_size = %.2f' %
          (model_name, dataset_name, test_size))
    print('*' * 70 + '\n')
    model_manager = utils.ModelManager(dataset_name, test_size)
    try:
        trainset = model_manager.load_model('trainset')
        testset = model_manager.load_model('testset')
    except OSError:
        ratings = DataSet.load_dataset(name=dataset_name)
        trainset, testset = DataSet.train_test_split(ratings,
                                                     test_size=test_size)
        model_manager.save_model(trainset, 'trainset')
        model_manager.save_model(testset, 'testset')
    '''Do you want to clean workspace and retrain model again?'''
    '''if you want to change test_size or retrain model, please set clean_workspace True'''
    model_manager.clean_workspace(clean)
    if model_name == 'UserCF':
        model = UserBasedCF()
    elif model_name == 'ItemCF':
        model = ItemBasedCF()
    elif model_name == 'Random':
        model = RandomPredict()
    elif model_name == 'MostPopular':
        model = MostPopular()
    elif model_name == 'UserCF-IIF':
        model = UserBasedCF(use_iif_similarity=True)
    elif model_name == 'ItemCF-IUF':
        model = ItemBasedCF(use_iuf_similarity=True)
    else:
        raise ValueError('No model named' + model_name)
    model.fit(trainset)
    recommend_test(model, [1, 100, 233, 666, 888])
    model.test(testset)
コード例 #2
0
        count = cur.execute(
            "select `id` from `book` where `id` not in("
            "select distinct `book_id` from `read_record`) "
            "and `id` not in(select distinct `book_id` from `buy_record`)")
        result = cur.fetchmany(count)
        all_book_not_been_read = []
        for row in result:
            all_book_not_been_read.append(row[0])
        cur.close()
        conn.commit()
        conn.close()
    except Exception, e:
        print Exception, ':', e

    preprocessed_dataset = data_preprocessing.preprocess_data(dataset)
    mp = MostPopular(preprocessed_dataset)
    mp.calc_item_popularity()
    mp_by_ratio = MPByCategoryRatio(preprocessed_dataset, item_category_map)
    mp_by_ratio.calc_item_popularity()
    user_cf = HieraKmeansUserCF(preprocessed_dataset, item_category_map,
                                n_sample, max_iter)
    user_cf.calc_user_sim()

    mutex.acquire()
    PREPROCESSED_DATASET = preprocessed_dataset
    ALL_BOOK_NOT_BEEN_READ = all_book_not_been_read
    MP = mp
    MP_BY_RATIO = mp_by_ratio
    USER_CF = user_cf
    mutex.release()
    print 'update complete'