class TestReader(unittest.TestCase): def setUp(self): self.data_file = './tests/data.txt' self.reader = Reader() def test_raise(self): try: self.reader.read(self.data_file, fmt='bla bla') except ValueError: assert True def test_read_ui(self): triplets = self.reader.read(self.data_file, fmt='UI') self.assertEqual(len(triplets), 30) self.assertEqual(triplets[0][1], '93') self.assertEqual(triplets[1][2], 1.0) triplets = self.reader.read(self.data_file, fmt='UI', id_inline=True) self.assertEqual(len(triplets), 40) def test_read_uir(self): triplet_data = self.reader.read(self.data_file) self.assertEqual(len(triplet_data), 10) self.assertEqual(triplet_data[4][2], 3) self.assertEqual(triplet_data[6][1], '478') self.assertEqual(triplet_data[8][0], '543') def test_filter(self): reader = Reader(bin_threshold=4.0) self.assertEqual(len(reader.read(self.data_file)), 8) reader = Reader(min_user_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(min_item_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(user_set=['76'], item_set=['93']) self.assertEqual(len(reader.read(self.data_file)), 1) reader = Reader(user_set=['76', '768']) self.assertEqual(len(reader.read(self.data_file)), 2) reader = Reader(item_set=['93', '257', '795']) self.assertEqual(len(reader.read(self.data_file)), 3) def test_read_text(self): self.assertEqual(len(read_text(self.data_file, sep=None)), 10) self.assertEqual(read_text(self.data_file, sep='\t')[1][0], '76')
def load_feedback(variant="closed_loop", reader=None): """Load the user-item ratings of one of the Yahoo Music datasets Parameters ---------- variant: str, optional, default: 'closed_loop' Specifies which Yahoo Music dataset to load, one of ['closed_loop', 'open_loop']. reader: `obj:cornac.data.Reader`, optional, default: None Reader object used to read the data. Returns ------- data: array-like Data in the form of a list of tuples depending on the given data format. """ yah = YAHOO_DATASETS.get(variant.upper(), None) if yah is None: raise ValueError("variant must be one of {}.".format( YAHOO_DATASETS.keys())) fpath = cache(url=yah.url, unzip=yah.unzip, relative_path=yah.path) reader = Reader() if reader is None else reader return reader.read(fpath, 'UIR', sep=yah.sep, skip_lines=yah.skip)
def read_data(path): fmt = validate_format("UIR", ["UIR", "UIRT"]) # HARDCODE UIR reader = None reader = Reader() if reader is None else reader data = reader.read(path) return data
def test_filter(self): reader = Reader(bin_threshold=4.0) self.assertEqual(len(reader.read(self.data_file)), 8) reader = Reader(min_user_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(min_item_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(user_set=['76'], item_set=['93']) self.assertEqual(len(reader.read(self.data_file)), 1) reader = Reader(user_set=['76', '768']) self.assertEqual(len(reader.read(self.data_file)), 2) reader = Reader(item_set=['93', '257', '795']) self.assertEqual(len(reader.read(self.data_file)), 3)
def test_filter(self): reader = Reader(bin_threshold=4.0) data = reader.read(self.data_file) self.assertEqual(len(data), 8) self.assertListEqual([x[2] for x in data], [1] * len(data)) reader = Reader(min_user_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(min_item_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(user_set=['76'], item_set=['93']) self.assertEqual(len(reader.read(self.data_file)), 1) reader = Reader(user_set=['76', '768']) self.assertEqual(len(reader.read(self.data_file)), 2) reader = Reader(item_set=['93', '257', '795']) self.assertEqual(len(reader.read(self.data_file)), 3)
class TestReader(unittest.TestCase): def setUp(self): self.data_file = './tests/data.txt' self.reader = Reader() def test_raise(self): try: self.reader.read(self.data_file, fmt='bla bla') except ValueError: assert True def test_read_ui(self): triplets = self.reader.read(self.data_file, fmt='UI') self.assertEqual(len(triplets), 30) self.assertEqual(triplets[0][1], '93') self.assertEqual(triplets[1][2], 1.0) triplets = self.reader.read(self.data_file, fmt='UI', id_inline=True) self.assertEqual(len(triplets), 40) def test_read_uir(self): triplet_data = self.reader.read(self.data_file) self.assertEqual(len(triplet_data), 10) self.assertEqual(triplet_data[4][2], 3) self.assertEqual(triplet_data[6][1], '478') self.assertEqual(triplet_data[8][0], '543') def test_read_uirt(self): data = self.reader.read(self.data_file, fmt='UIRT') self.assertEqual(len(data), 10) self.assertEqual(data[4][3], 891656347) self.assertEqual(data[4][2], 3) self.assertEqual(data[4][1], '705') self.assertEqual(data[4][0], '329') self.assertEqual(data[9][3], 879451804) def test_read_tup(self): tup_data = self.reader.read(self.data_file, fmt='UITup') self.assertEqual(len(tup_data), 10) self.assertEqual(tup_data[4][2], [('3', ), ('891656347', )]) self.assertEqual(tup_data[6][1], '478') self.assertEqual(tup_data[8][0], '543') def test_read_review(self): review_data = self.reader.read('./tests/review.txt', fmt='UIReview') self.assertEqual(len(review_data), 5) self.assertEqual(review_data[0][2], 'Sample text 1') self.assertEqual(review_data[1][1], '257') self.assertEqual(review_data[4][0], '329') def test_filter(self): reader = Reader(bin_threshold=4.0) data = reader.read(self.data_file) self.assertEqual(len(data), 8) self.assertListEqual([x[2] for x in data], [1] * len(data)) reader = Reader(min_user_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(min_item_freq=2) self.assertEqual(len(reader.read(self.data_file)), 0) reader = Reader(user_set=['76'], item_set=['93']) self.assertEqual(len(reader.read(self.data_file)), 1) reader = Reader(user_set=['76', '768']) self.assertEqual(len(reader.read(self.data_file)), 2) reader = Reader(item_set=['93', '257', '795']) self.assertEqual(len(reader.read(self.data_file)), 3) def test_read_text(self): self.assertEqual(len(read_text(self.data_file, sep=None)), 10) self.assertEqual(read_text(self.data_file, sep='\t')[1][0], '76')
print("# bpr samples:", args.bpr_samples) print("# element samples:", args.element_samples) print("lambda reg =", args.lambda_reg) print("lambda bpr =", args.lambda_bpr) print("learning rate =", args.learning_rate) print("Seed value =", args.seed) print("VERBOSE =", args.verbose) return args args = parse_arguments() os.makedirs(args.out, exist_ok=True) reader = Reader() train_data = reader.read(os.path.join(args.indir, "train.txt"), sep=",") test_data = reader.read(os.path.join(args.indir, "test.txt"), sep=",") sentiment = reader.read(os.path.join(args.indir, "sentiment.txt"), fmt="UITup", sep=",", tup_sep=":") md = SentimentModality(data=sentiment) eval_method = BaseMethod.from_splits( train_data=train_data, test_data=test_data, sentiment=md, exclude_unknowns=True, verbose=args.verbose, ) mter = cornac.models.MTER(
# -*- coding: utf-8 -*- """ Example to train and evaluate a model with given data @author: Quoc-Tuan Truong <*****@*****.**> """ from cornac.data import Reader from cornac.eval_methods import BaseMethod from cornac.models import MF from cornac.metrics import MAE, RMSE from cornac.utils import cache # Download MovieLens 100K provided training and test splits reader = Reader() train_data = reader.read( cache(url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.base')) test_data = reader.read( cache(url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.test')) eval_method = BaseMethod.from_splits(train_data=train_data, test_data=test_data, exclude_unknowns=False, verbose=True) mf = MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, early_stop=True, verbose=True)
from cornac.data import Reader, GraphModality from cornac.datasets import epinions from cornac.eval_methods import RatioSplit import numpy as np dataset = "yelp_ON" if dataset == "ciao": data_path = "/cluster/home/it_stu110/proj/Rec_baseline/data_process/ciao/ciao_simple.txt" trust_path = "/cluster/home/it_stu110/proj/Rec_baseline/data_process/ciao/ciao_trust.txt" elif dataset == "yelp_ON": data_path = "/cluster/home/it_stu110/proj/Rec_baseline/data_process/yelp/yelp_ON_simple.txt" trust_path = '/cluster/home/it_stu110/proj/Rec_baseline/data_process/yelp/yelp_ON_trust.txt' # SBPR integrates user social network into Bayesian Personalized Ranking. # The necessary data can be loaded as follows reader = Reader() trust = reader.read(trust_path, sep=' ') feedback = reader.read(data_path, sep=' ') # Instantiate a GraphModality, it makes it convenient to work with graph (network) auxiliary information # For more details, please refer to the tutorial on how to work with auxiliary data user_graph_modality = GraphModality(data=trust) # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit( data=feedback, test_size=0.1, rating_threshold=0.5, exclude_unknowns=True, verbose=True, user_graph=user_graph_modality, )