Exemple #1
0
class TestReader(unittest.TestCase):
    def setUp(self):
        self.data_file = './tests/data.txt'
        self.reader = Reader()

    def test_raise(self):
        try:
            self.reader.read(self.data_file, fmt='bla bla')
        except ValueError:
            assert True

    def test_read_ui(self):
        triplets = self.reader.read(self.data_file, fmt='UI')
        self.assertEqual(len(triplets), 30)
        self.assertEqual(triplets[0][1], '93')
        self.assertEqual(triplets[1][2], 1.0)

        triplets = self.reader.read(self.data_file, fmt='UI', id_inline=True)
        self.assertEqual(len(triplets), 40)

    def test_read_uir(self):
        triplet_data = self.reader.read(self.data_file)

        self.assertEqual(len(triplet_data), 10)
        self.assertEqual(triplet_data[4][2], 3)
        self.assertEqual(triplet_data[6][1], '478')
        self.assertEqual(triplet_data[8][0], '543')

    def test_filter(self):
        reader = Reader(bin_threshold=4.0)
        self.assertEqual(len(reader.read(self.data_file)), 8)

        reader = Reader(min_user_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(min_item_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(user_set=['76'], item_set=['93'])
        self.assertEqual(len(reader.read(self.data_file)), 1)

        reader = Reader(user_set=['76', '768'])
        self.assertEqual(len(reader.read(self.data_file)), 2)

        reader = Reader(item_set=['93', '257', '795'])
        self.assertEqual(len(reader.read(self.data_file)), 3)

    def test_read_text(self):
        self.assertEqual(len(read_text(self.data_file, sep=None)), 10)
        self.assertEqual(read_text(self.data_file, sep='\t')[1][0], '76')
Exemple #2
0
def load_feedback(variant="closed_loop", reader=None):
    """Load the user-item ratings of one of the Yahoo Music datasets

    Parameters
    ----------
    variant: str, optional, default: 'closed_loop'
        Specifies which Yahoo Music dataset to load, one of ['closed_loop', 'open_loop'].

    reader: `obj:cornac.data.Reader`, optional, default: None
        Reader object used to read the data.

    Returns
    -------
    data: array-like
        Data in the form of a list of tuples depending on the given data format.
    """

    yah = YAHOO_DATASETS.get(variant.upper(), None)
    if yah is None:
        raise ValueError("variant must be one of {}.".format(
            YAHOO_DATASETS.keys()))

    fpath = cache(url=yah.url, unzip=yah.unzip, relative_path=yah.path)
    reader = Reader() if reader is None else reader
    return reader.read(fpath, 'UIR', sep=yah.sep, skip_lines=yah.skip)
Exemple #3
0
def read_data(path):
    fmt = validate_format("UIR", ["UIR", "UIRT"]) # HARDCODE UIR 
    reader = None
    reader = Reader() if reader is None else reader
    data = reader.read(path)

    return data
Exemple #4
0
    def test_filter(self):
        reader = Reader(bin_threshold=4.0)
        self.assertEqual(len(reader.read(self.data_file)), 8)

        reader = Reader(min_user_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(min_item_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(user_set=['76'], item_set=['93'])
        self.assertEqual(len(reader.read(self.data_file)), 1)

        reader = Reader(user_set=['76', '768'])
        self.assertEqual(len(reader.read(self.data_file)), 2)

        reader = Reader(item_set=['93', '257', '795'])
        self.assertEqual(len(reader.read(self.data_file)), 3)
Exemple #5
0
    def test_filter(self):
        reader = Reader(bin_threshold=4.0)
        data = reader.read(self.data_file)
        self.assertEqual(len(data), 8)
        self.assertListEqual([x[2] for x in data], [1] * len(data))

        reader = Reader(min_user_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(min_item_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(user_set=['76'], item_set=['93'])
        self.assertEqual(len(reader.read(self.data_file)), 1)

        reader = Reader(user_set=['76', '768'])
        self.assertEqual(len(reader.read(self.data_file)), 2)

        reader = Reader(item_set=['93', '257', '795'])
        self.assertEqual(len(reader.read(self.data_file)), 3)
Exemple #6
0
class TestReader(unittest.TestCase):
    def setUp(self):
        self.data_file = './tests/data.txt'
        self.reader = Reader()

    def test_raise(self):
        try:
            self.reader.read(self.data_file, fmt='bla bla')
        except ValueError:
            assert True

    def test_read_ui(self):
        triplets = self.reader.read(self.data_file, fmt='UI')
        self.assertEqual(len(triplets), 30)
        self.assertEqual(triplets[0][1], '93')
        self.assertEqual(triplets[1][2], 1.0)

        triplets = self.reader.read(self.data_file, fmt='UI', id_inline=True)
        self.assertEqual(len(triplets), 40)

    def test_read_uir(self):
        triplet_data = self.reader.read(self.data_file)

        self.assertEqual(len(triplet_data), 10)
        self.assertEqual(triplet_data[4][2], 3)
        self.assertEqual(triplet_data[6][1], '478')
        self.assertEqual(triplet_data[8][0], '543')

    def test_read_uirt(self):
        data = self.reader.read(self.data_file, fmt='UIRT')

        self.assertEqual(len(data), 10)
        self.assertEqual(data[4][3], 891656347)
        self.assertEqual(data[4][2], 3)
        self.assertEqual(data[4][1], '705')
        self.assertEqual(data[4][0], '329')
        self.assertEqual(data[9][3], 879451804)

    def test_read_tup(self):
        tup_data = self.reader.read(self.data_file, fmt='UITup')
        self.assertEqual(len(tup_data), 10)
        self.assertEqual(tup_data[4][2], [('3', ), ('891656347', )])
        self.assertEqual(tup_data[6][1], '478')
        self.assertEqual(tup_data[8][0], '543')

    def test_read_review(self):
        review_data = self.reader.read('./tests/review.txt', fmt='UIReview')
        self.assertEqual(len(review_data), 5)
        self.assertEqual(review_data[0][2], 'Sample text 1')
        self.assertEqual(review_data[1][1], '257')
        self.assertEqual(review_data[4][0], '329')

    def test_filter(self):
        reader = Reader(bin_threshold=4.0)
        data = reader.read(self.data_file)
        self.assertEqual(len(data), 8)
        self.assertListEqual([x[2] for x in data], [1] * len(data))

        reader = Reader(min_user_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(min_item_freq=2)
        self.assertEqual(len(reader.read(self.data_file)), 0)

        reader = Reader(user_set=['76'], item_set=['93'])
        self.assertEqual(len(reader.read(self.data_file)), 1)

        reader = Reader(user_set=['76', '768'])
        self.assertEqual(len(reader.read(self.data_file)), 2)

        reader = Reader(item_set=['93', '257', '795'])
        self.assertEqual(len(reader.read(self.data_file)), 3)

    def test_read_text(self):
        self.assertEqual(len(read_text(self.data_file, sep=None)), 10)
        self.assertEqual(read_text(self.data_file, sep='\t')[1][0], '76')
Exemple #7
0
    print("# bpr samples:", args.bpr_samples)
    print("# element samples:", args.element_samples)
    print("lambda reg =", args.lambda_reg)
    print("lambda bpr =", args.lambda_bpr)
    print("learning rate =", args.learning_rate)
    print("Seed value =", args.seed)
    print("VERBOSE =", args.verbose)
    return args


args = parse_arguments()

os.makedirs(args.out, exist_ok=True)

reader = Reader()
train_data = reader.read(os.path.join(args.indir, "train.txt"), sep=",")
test_data = reader.read(os.path.join(args.indir, "test.txt"), sep=",")
sentiment = reader.read(os.path.join(args.indir, "sentiment.txt"),
                        fmt="UITup",
                        sep=",",
                        tup_sep=":")
md = SentimentModality(data=sentiment)
eval_method = BaseMethod.from_splits(
    train_data=train_data,
    test_data=test_data,
    sentiment=md,
    exclude_unknowns=True,
    verbose=args.verbose,
)

mter = cornac.models.MTER(
Exemple #8
0
# -*- coding: utf-8 -*-
"""
Example to train and evaluate a model with given data

@author: Quoc-Tuan Truong <*****@*****.**>
"""

from cornac.data import Reader
from cornac.eval_methods import BaseMethod
from cornac.models import MF
from cornac.metrics import MAE, RMSE
from cornac.utils import cache

# Download MovieLens 100K provided training and test splits
reader = Reader()
train_data = reader.read(
    cache(url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.base'))
test_data = reader.read(
    cache(url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.test'))

eval_method = BaseMethod.from_splits(train_data=train_data,
                                     test_data=test_data,
                                     exclude_unknowns=False,
                                     verbose=True)

mf = MF(k=10,
        max_iter=25,
        learning_rate=0.01,
        lambda_reg=0.02,
        use_bias=True,
        early_stop=True,
        verbose=True)
Exemple #9
0
from cornac.data import Reader, GraphModality
from cornac.datasets import epinions
from cornac.eval_methods import RatioSplit
import numpy as np
dataset = "yelp_ON"
if dataset == "ciao":
    data_path = "/cluster/home/it_stu110/proj/Rec_baseline/data_process/ciao/ciao_simple.txt"
    trust_path = "/cluster/home/it_stu110/proj/Rec_baseline/data_process/ciao/ciao_trust.txt"
elif dataset == "yelp_ON":
    data_path = "/cluster/home/it_stu110/proj/Rec_baseline/data_process/yelp/yelp_ON_simple.txt"
    trust_path = '/cluster/home/it_stu110/proj/Rec_baseline/data_process/yelp/yelp_ON_trust.txt'
# SBPR integrates user social network into Bayesian Personalized Ranking.
# The necessary data can be loaded as follows

reader = Reader()
trust = reader.read(trust_path, sep=' ')
feedback = reader.read(data_path, sep=' ')

# Instantiate a GraphModality, it makes it convenient to work with graph (network) auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
user_graph_modality = GraphModality(data=trust)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(
    data=feedback,
    test_size=0.1,
    rating_threshold=0.5,
    exclude_unknowns=True,
    verbose=True,
    user_graph=user_graph_modality,
)