def __init__(self, model_dir: str, K=100, epochs=10, alpha=0.01, lamb=0.01, n_rec_movie=10, save_model=True): """ Init LFM with K, T, alpha, lamb :param K: Latent Factor dimension :param epochs: epochs to go :param alpha: study rate :param lamb: regular params :param save_model: save model """ print("LFM start...\n") self.K = K self.epochs = epochs self.alpha = alpha self.lamb = lamb self.n_rec_movie = n_rec_movie self.save_model = save_model self.users_set, self.items_set = set(), set() self.items_list = list() self.P, self.Q = None, None self.trainset = None self.testset = None self.user_average_score = dict() self.item_average_score = dict() self.item_popular, self.items_count = None, None self.model_name = 'K={}-epochs={}-alpha={}-lamb={}'.format( self.K, self.epochs, self.alpha, self.lamb) self.model_manager = utils.ModelManager(model_dir + self.model_name)
def __init__(self, model_dir: str, k_sim_user=20, n_rec_movie=10, use_iif_similarity=False, save_model=True): """ Init UserBasedCF with n_sim_user and n_rec_movie. :return: None """ self.k_sim_user = k_sim_user self.n_rec_movie = n_rec_movie self.trainset = None self.save_model = save_model self.use_iif_similarity = use_iif_similarity self.item_average_score = None self.model_manager = utils.ModelManager(model_dir)
def run_model(model_name, dataset_name, test_size=0.3, clean=False): print('*' * 70) print('\tThis is %s model trained on %s with test_size = %.2f' % (model_name, dataset_name, test_size)) print('*' * 70 + '\n') model_manager = utils.ModelManager(dataset_name, test_size) try: trainset = model_manager.load_pkl('trainset') testset = model_manager.load_pkl('testset') except OSError: ratings = DataSet.load_dataset(name=dataset_name) trainset, testset = DataSet.train_test_split(ratings, test_size=test_size) model_manager.save_pkl(trainset, 'trainset') model_manager.save_pkl(testset, 'testset') '''Do you want to clean workspace and retrain model again?''' '''if you want to change test_size or retrain model, please set clean_workspace True''' model_manager.clean_workspace(clean) if model_name == 'UserCF': model = UserBasedCF() elif model_name == 'ItemCF': model = ItemBasedCF() elif model_name == 'Random': model = RandomPredict() elif model_name == 'MostPopular': model = MostPopular() elif model_name == 'UserCF-IIF': model = UserBasedCF(use_iif_similarity=True) elif model_name == 'ItemCF-IUF': model = ItemBasedCF(use_iuf_similarity=True) elif model_name == 'LFM': # K, epochs, alpha, lamb, n_rec_movie model = LFM(10, 20, 0.1, 0.01, 10) else: raise ValueError('No model named ' + model_name) model.fit(trainset) recommend_test(model, [1, 100, 233, 666, 888]) model.test(testset)
def __init__(self, model_dir, batch_size=8192, epochs=100, lr=1e-3, dropout=0.5, early_stop=10, max_length=50, overwrite=True, **kwargs): self.model_manager = utils.ModelManager(model_dir) self.ckpt_path = os.path.join(self.model_manager.path_name, 'ckpt') self.batch_size = batch_size self.epochs = epochs self.lr = lr self.dropout = dropout self.early_stop = early_stop self.max_length = max_length self.user_list = load_np_array(CONFIG.user_list_file) self.item_list = load_np_array(CONFIG.movie_list_file) self.target_word2id = make_vocab_lookup(CONFIG.target_word_list, unk_token='UNK') self.description_word2id = make_vocab_lookup( CONFIG.description_word_list, unk_token='UNK') self.sentiment_word2id = make_vocab_lookup( CONFIG.sentiment_category_list) self.target_num = len(self.target_word2id) self.description_num = len(self.description_word2id) tf.reset_default_graph() self.model = TripleSentimentRating(self.target_num, self.description_num, **kwargs) self.sess = None self.saver = None self.global_step = tf.Variable(0, trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.variable_scope("Optimizer"): params = tf.trainable_variables() gradients = tf.gradients(self.model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5) optimizer = tf.train.AdamOptimizer(self.lr) # clipped_gradients, _ = tf.clip_by_global_norm(gradients, 0.5) # optimizer = tf.train.GradientDescentOptimizer(self.lr) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.create_or_load_model(overwrite) def parse_profile(_id, _type='subject'): file = os.path.join(CONFIG.data_path, _type, str(_id), 'analysis', 'profile.json') targets = [] descriptions = [] sentiments = [] freqs = [] if not os.path.exists(file): print_with_time('file not exists: %s' % file) return { 'target': targets, 'description': descriptions, 'sentiment': sentiments, 'freq': freqs, 'length': 0 } profile = load_json_file(file) for target, sentiment_description_sample in profile.items(): for sentiment, description_sample in sentiment_description_sample.items( ): for description, samples in description_sample.items(): targets.append(target) descriptions.append(description) sentiments.append(sentiment) freqs.append(len(samples)) targets = list(map(lambda x: self.target_word2id[x], targets)) descriptions = list( map(lambda x: self.description_word2id[x], descriptions)) sentiments = list( map(lambda x: self.sentiment_word2id[x], sentiments)) length = len(freqs) return { 'target': targets, 'description': descriptions, 'sentiment': sentiments, 'freq': freqs, 'length': length } print_with_time('initial user profiles') try: self.user_profiles = self.model_manager.load_json('user_profiles') except OSError: self.user_profiles = list( map(lambda x: parse_profile(x, 'user'), self.user_list)) self.model_manager.save_json(self.user_profiles, 'user_profiles') print_with_time('initial movie profiles') try: self.movie_profiles = self.model_manager.load_json( 'movie_profiles') except OSError: self.movie_profiles = list( map(lambda x: parse_profile(x, 'subject'), self.item_list)) self.model_manager.save_json(self.movie_profiles, 'movie_profiles') print_with_time('profiles initialized')