コード例 #1
0
ファイル: hermes_run_script.py プロジェクト: fototo/hermes
    def run_cb_predictions(self):
        for cv in self.content_vector_types:
            content_path = self.directory + self.data_name +'_cv_' + cv + '.pkl'
            content_vect = sl.load_from_hadoop(content_path, self.sc).repartition(self.num_partitions)

            for uv in self.user_vector_types:
                train_ratings_loc = self.directory + self.data_name + '_uv_train_' + uv + '.pkl'
                train_ratings = sl.load_from_hadoop(train_ratings_loc, self.sc).repartition(self.num_partitions)

                for cb_pred in self.cb_predictions:
                    pred_save_loc = self.directory + self.data_name + '_predictions_' + uv + '_' + cv + '_' + cb_pred  + '.pkl'
                    print pred_save_loc
                    if os.path.isdir(pred_save_loc)==False:
                        print 'Running ' + cb_pred + ' for user vector ' + uv + ' and content vector ' + cv
                        if cb_pred=='cb_vect':
                            predictions = content_based.predict(train_ratings, content_vect, num_partitions=self.num_partitions)
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        elif cb_pred=='cb_kmeans_100':
                            predictions = content_based_kmeans.predict(train_ratings, content_vect, num_predictions=100, num_partitions=self.num_partitions)
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        elif cb_pred=='cb_kmeans_1000':
                            predictions = content_based_kmeans.predict(train_ratings, content_vect, num_predictions=1000, num_partitions=self.num_partitions)
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        else:
                            break
        print 'All CB predictions saved'
コード例 #2
0
ファイル: hermes_run_script.py プロジェクト: agude/hermes
    def run_cb_predictions(self):
        for cv in self.content_vector_types:
            content_path = self.directory + self.data_name + "_cv_" + cv + ".pkl"
            content_vect = sl.load_from_hadoop(content_path, self.sc).repartition(self.num_partitions)

            for uv in self.user_vector_types:
                train_ratings_loc = self.directory + self.data_name + "_uv_train_" + uv + ".pkl"
                train_ratings = sl.load_from_hadoop(train_ratings_loc, self.sc).repartition(self.num_partitions)

                for cb_pred in self.cb_predictions:
                    pred_save_loc = (
                        self.directory + self.data_name + "_predictions_" + uv + "_" + cv + "_" + cb_pred + ".pkl"
                    )
                    print pred_save_loc
                    if os.path.isdir(pred_save_loc) == False:
                        print "Running " + cb_pred + " for user vector " + uv + " and content vector " + cv
                        if cb_pred == "cb_vect":
                            predictions = content_based.predict(
                                train_ratings, content_vect, num_partitions=self.num_partitions
                            )
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        elif cb_pred == "cb_kmeans_100":
                            predictions = content_based_kmeans.predict(
                                train_ratings, content_vect, num_predictions=100, num_partitions=self.num_partitions
                            )
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        elif cb_pred == "cb_kmeans_1000":
                            predictions = content_based_kmeans.predict(
                                train_ratings, content_vect, num_predictions=1000, num_partitions=self.num_partitions
                            )
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        else:
                            break
        print "All CB predictions saved"
コード例 #3
0
ファイル: hermes_run_script.py プロジェクト: bethke/hermes
    def run_single_prediction(self, user_vector, content_vector, alg_type):
        train_ratings_loc = self.directory + self.data_name + '_uv_train_' + user_vector + '.pkl'
        train_ratings = sl.load_from_hadoop(train_ratings_loc, self.sc).repartition(self.num_partitions)

        if content_vector:
            content_path = self.directory + self.data_name +'_cv_' + content_vector + '.pkl'
            content_vect = sl.load_from_hadoop(content_path, self.sc).repartition(self.num_partitions)
            print 'Running ' + alg_type + ' for user vector ' + user_vector + ' and content vector ' + content_vector

            pred_save_loc = self.directory + self.data_name + '_predictions_' + user_vector + '_' + content_vector + '_' + alg_type  + '.pkl'
            print pred_save_loc

            if alg_type=='cb_vect':
                predictions = content_based.predict(train_ratings, content_vect, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cb_kmeans_100':
                predictions = content_based_kmeans.predict(train_ratings, content_vect, num_predictions=100, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cb_kmeans_1000':
                predictions = content_based_kmeans.predict(train_ratings, content_vect, num_predictions=1000, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)

        else:
            print 'Running ' + alg_type + ' for user vector ' + user_vector

            pred_save_loc = self.directory + self.data_name + '_predictions_' + user_vector + '_' + alg_type  + '.pkl'
            print pred_save_loc

            if alg_type=='cf_mllib':
                predictions = cf.calc_cf_mllib(train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cf_item':
                predictions = cf.calc_item_item_cf(train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cf_user':
                predictions = cf.calc_user_user_cf2(train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cf_bayes_map':
                predictions = cf.calc_naive_bayes_map(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cf_bayes_mse':
                predictions = cf.calc_naive_bayes_mse(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cf_bayes_mae':
                predictions = cf.calc_naive_bayes_mae(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type=='cf_random':
                predictions = random_recommender.predict(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
コード例 #4
0
    def run_cb_predictions(self):
        for cv in self.content_vector_types:
            content_path = self.directory + self.data_name + '_cv_' + cv + '.pkl'
            content_vect = sl.load_from_hadoop(
                content_path, self.sc).repartition(self.num_partitions)

            for uv in self.user_vector_types:
                train_ratings_loc = self.directory + self.data_name + '_uv_train_' + uv + '.pkl'
                train_ratings = sl.load_from_hadoop(train_ratings_loc,
                                                    self.sc).repartition(
                                                        self.num_partitions)

                for cb_pred in self.cb_predictions:
                    pred_save_loc = self.directory + self.data_name + '_predictions_' + uv + '_' + cv + '_' + cb_pred + '.pkl'
                    print pred_save_loc
                    if os.path.isdir(pred_save_loc) == False:
                        print 'Running ' + cb_pred + ' for user vector ' + uv + ' and content vector ' + cv
                        if cb_pred == 'cb_vect':
                            predictions = content_based.predict(
                                train_ratings,
                                content_vect,
                                num_partitions=self.num_partitions)
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        elif cb_pred == 'cb_kmeans_100':
                            predictions = content_based_kmeans.predict(
                                train_ratings,
                                content_vect,
                                num_predictions=100,
                                num_partitions=self.num_partitions)
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        elif cb_pred == 'cb_kmeans_1000':
                            predictions = content_based_kmeans.predict(
                                train_ratings,
                                content_vect,
                                num_predictions=1000,
                                num_partitions=self.num_partitions)
                            sl.save_to_hadoop(predictions, pred_save_loc)
                        else:
                            break
        print 'All CB predictions saved'
コード例 #5
0
    def run_single_prediction(self, user_vector, content_vector, alg_type):
        train_ratings_loc = self.directory + self.data_name + '_uv_train_' + user_vector + '.pkl'
        train_ratings = sl.load_from_hadoop(
            train_ratings_loc, self.sc).repartition(self.num_partitions)

        if content_vector:
            content_path = self.directory + self.data_name + '_cv_' + content_vector + '.pkl'
            content_vect = sl.load_from_hadoop(
                content_path, self.sc).repartition(self.num_partitions)
            print 'Running ' + alg_type + ' for user vector ' + user_vector + ' and content vector ' + content_vector

            pred_save_loc = self.directory + self.data_name + '_predictions_' + user_vector + '_' + content_vector + '_' + alg_type + '.pkl'
            print pred_save_loc

            if alg_type == 'cb_vect':
                predictions = content_based.predict(
                    train_ratings,
                    content_vect,
                    num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cb_kmeans_100':
                predictions = content_based_kmeans.predict(
                    train_ratings,
                    content_vect,
                    num_predictions=100,
                    num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cb_kmeans_1000':
                predictions = content_based_kmeans.predict(
                    train_ratings,
                    content_vect,
                    num_predictions=1000,
                    num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)

        else:
            print 'Running ' + alg_type + ' for user vector ' + user_vector

            pred_save_loc = self.directory + self.data_name + '_predictions_' + user_vector + '_' + alg_type + '.pkl'
            print pred_save_loc

            if alg_type == 'cf_mllib':
                predictions = cf.calc_cf_mllib(
                    train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cf_item':
                predictions = cf.calc_item_item_cf(
                    train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cf_user':
                predictions = cf.calc_user_user_cf2(
                    train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cf_bayes_map':
                predictions = cf.calc_naive_bayes_map(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cf_bayes_mse':
                predictions = cf.calc_naive_bayes_mse(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cf_bayes_mae':
                predictions = cf.calc_naive_bayes_mae(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == 'cf_random':
                predictions = random_recommender.predict(
                    train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
コード例 #6
0
ファイル: hermes_run_script.py プロジェクト: agude/hermes
    def run_single_prediction(self, user_vector, content_vector, alg_type):
        train_ratings_loc = self.directory + self.data_name + "_uv_train_" + user_vector + ".pkl"
        train_ratings = sl.load_from_hadoop(train_ratings_loc, self.sc).repartition(self.num_partitions)

        if content_vector:
            content_path = self.directory + self.data_name + "_cv_" + content_vector + ".pkl"
            content_vect = sl.load_from_hadoop(content_path, self.sc).repartition(self.num_partitions)
            print "Running " + alg_type + " for user vector " + user_vector + " and content vector " + content_vector

            pred_save_loc = (
                self.directory
                + self.data_name
                + "_predictions_"
                + user_vector
                + "_"
                + content_vector
                + "_"
                + alg_type
                + ".pkl"
            )
            print pred_save_loc

            if alg_type == "cb_vect":
                predictions = content_based.predict(train_ratings, content_vect, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cb_kmeans_100":
                predictions = content_based_kmeans.predict(
                    train_ratings, content_vect, num_predictions=100, num_partitions=self.num_partitions
                )
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cb_kmeans_1000":
                predictions = content_based_kmeans.predict(
                    train_ratings, content_vect, num_predictions=1000, num_partitions=self.num_partitions
                )
                sl.save_to_hadoop(predictions, pred_save_loc)

        else:
            print "Running " + alg_type + " for user vector " + user_vector

            pred_save_loc = self.directory + self.data_name + "_predictions_" + user_vector + "_" + alg_type + ".pkl"
            print pred_save_loc

            if alg_type == "cf_mllib":
                predictions = cf.calc_cf_mllib(train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cf_item":
                predictions = cf.calc_item_item_cf(train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cf_user":
                predictions = cf.calc_user_user_cf2(train_ratings, num_partitions=self.num_partitions)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cf_bayes_map":
                predictions = cf.calc_naive_bayes_map(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cf_bayes_mse":
                predictions = cf.calc_naive_bayes_mse(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cf_bayes_mae":
                predictions = cf.calc_naive_bayes_mae(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)
            elif alg_type == "cf_random":
                predictions = random_recommender.predict(train_ratings, self.sc)
                sl.save_to_hadoop(predictions, pred_save_loc)