예제 #1
0
 def __init__(self,
              model_dir: str,
              K=100,
              epochs=10,
              alpha=0.01,
              lamb=0.01,
              n_rec_movie=10,
              save_model=True):
     """
     Init LFM with K, T, alpha, lamb
     :param K: Latent Factor dimension
     :param epochs: epochs to go
     :param alpha: study rate
     :param lamb: regular params
     :param save_model: save model
     """
     print("LFM start...\n")
     self.K = K
     self.epochs = epochs
     self.alpha = alpha
     self.lamb = lamb
     self.n_rec_movie = n_rec_movie
     self.save_model = save_model
     self.users_set, self.items_set = set(), set()
     self.items_list = list()
     self.P, self.Q = None, None
     self.trainset = None
     self.testset = None
     self.user_average_score = dict()
     self.item_average_score = dict()
     self.item_popular, self.items_count = None, None
     self.model_name = 'K={}-epochs={}-alpha={}-lamb={}'.format(
         self.K, self.epochs, self.alpha, self.lamb)
     self.model_manager = utils.ModelManager(model_dir + self.model_name)
예제 #2
0
 def __init__(self,
              model_dir: str,
              k_sim_user=20,
              n_rec_movie=10,
              use_iif_similarity=False,
              save_model=True):
     """
     Init UserBasedCF with n_sim_user and n_rec_movie.
     :return: None
     """
     self.k_sim_user = k_sim_user
     self.n_rec_movie = n_rec_movie
     self.trainset = None
     self.save_model = save_model
     self.use_iif_similarity = use_iif_similarity
     self.item_average_score = None
     self.model_manager = utils.ModelManager(model_dir)
예제 #3
0
def run_model(model_name, dataset_name, test_size=0.3, clean=False):
    print('*' * 70)
    print('\tThis is %s model trained on %s with test_size = %.2f' %
          (model_name, dataset_name, test_size))
    print('*' * 70 + '\n')
    model_manager = utils.ModelManager(dataset_name, test_size)
    try:
        trainset = model_manager.load_pkl('trainset')
        testset = model_manager.load_pkl('testset')
    except OSError:
        ratings = DataSet.load_dataset(name=dataset_name)
        trainset, testset = DataSet.train_test_split(ratings,
                                                     test_size=test_size)
        model_manager.save_pkl(trainset, 'trainset')
        model_manager.save_pkl(testset, 'testset')
    '''Do you want to clean workspace and retrain model again?'''
    '''if you want to change test_size or retrain model, please set clean_workspace True'''
    model_manager.clean_workspace(clean)
    if model_name == 'UserCF':
        model = UserBasedCF()
    elif model_name == 'ItemCF':
        model = ItemBasedCF()
    elif model_name == 'Random':
        model = RandomPredict()
    elif model_name == 'MostPopular':
        model = MostPopular()
    elif model_name == 'UserCF-IIF':
        model = UserBasedCF(use_iif_similarity=True)
    elif model_name == 'ItemCF-IUF':
        model = ItemBasedCF(use_iuf_similarity=True)
    elif model_name == 'LFM':
        # K, epochs, alpha, lamb, n_rec_movie
        model = LFM(10, 20, 0.1, 0.01, 10)
    else:
        raise ValueError('No model named ' + model_name)
    model.fit(trainset)
    recommend_test(model, [1, 100, 233, 666, 888])
    model.test(testset)
예제 #4
0
    def __init__(self,
                 model_dir,
                 batch_size=8192,
                 epochs=100,
                 lr=1e-3,
                 dropout=0.5,
                 early_stop=10,
                 max_length=50,
                 overwrite=True,
                 **kwargs):
        self.model_manager = utils.ModelManager(model_dir)
        self.ckpt_path = os.path.join(self.model_manager.path_name, 'ckpt')
        self.batch_size = batch_size
        self.epochs = epochs
        self.lr = lr
        self.dropout = dropout
        self.early_stop = early_stop
        self.max_length = max_length
        self.user_list = load_np_array(CONFIG.user_list_file)
        self.item_list = load_np_array(CONFIG.movie_list_file)
        self.target_word2id = make_vocab_lookup(CONFIG.target_word_list,
                                                unk_token='UNK')
        self.description_word2id = make_vocab_lookup(
            CONFIG.description_word_list, unk_token='UNK')
        self.sentiment_word2id = make_vocab_lookup(
            CONFIG.sentiment_category_list)
        self.target_num = len(self.target_word2id)
        self.description_num = len(self.description_word2id)

        tf.reset_default_graph()
        self.model = TripleSentimentRating(self.target_num,
                                           self.description_num, **kwargs)
        self.sess = None
        self.saver = None
        self.global_step = tf.Variable(0, trainable=False)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.variable_scope("Optimizer"):
            params = tf.trainable_variables()
            gradients = tf.gradients(self.model.loss, params)
            clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5)
            optimizer = tf.train.AdamOptimizer(self.lr)
            # clipped_gradients, _ = tf.clip_by_global_norm(gradients, 0.5)
            # optimizer = tf.train.GradientDescentOptimizer(self.lr)
            with tf.control_dependencies(update_ops):
                self.train_op = optimizer.apply_gradients(
                    zip(clipped_gradients, params),
                    global_step=self.global_step)
        self.create_or_load_model(overwrite)

        def parse_profile(_id, _type='subject'):
            file = os.path.join(CONFIG.data_path, _type, str(_id), 'analysis',
                                'profile.json')
            targets = []
            descriptions = []
            sentiments = []
            freqs = []
            if not os.path.exists(file):
                print_with_time('file not exists: %s' % file)
                return {
                    'target': targets,
                    'description': descriptions,
                    'sentiment': sentiments,
                    'freq': freqs,
                    'length': 0
                }
            profile = load_json_file(file)
            for target, sentiment_description_sample in profile.items():
                for sentiment, description_sample in sentiment_description_sample.items(
                ):
                    for description, samples in description_sample.items():
                        targets.append(target)
                        descriptions.append(description)
                        sentiments.append(sentiment)
                        freqs.append(len(samples))
            targets = list(map(lambda x: self.target_word2id[x], targets))
            descriptions = list(
                map(lambda x: self.description_word2id[x], descriptions))
            sentiments = list(
                map(lambda x: self.sentiment_word2id[x], sentiments))
            length = len(freqs)
            return {
                'target': targets,
                'description': descriptions,
                'sentiment': sentiments,
                'freq': freqs,
                'length': length
            }

        print_with_time('initial user profiles')
        try:
            self.user_profiles = self.model_manager.load_json('user_profiles')
        except OSError:
            self.user_profiles = list(
                map(lambda x: parse_profile(x, 'user'), self.user_list))
            self.model_manager.save_json(self.user_profiles, 'user_profiles')
        print_with_time('initial movie profiles')
        try:
            self.movie_profiles = self.model_manager.load_json(
                'movie_profiles')
        except OSError:
            self.movie_profiles = list(
                map(lambda x: parse_profile(x, 'subject'), self.item_list))
            self.model_manager.save_json(self.movie_profiles, 'movie_profiles')
        print_with_time('profiles initialized')