Beispiel #1
0
# -*- coding: utf-8 -*-
"""
Example for Collaborative Deep Ranking

@author: Tran Thanh Binh
"""

import cornac
from cornac.data import Reader
from cornac.datasets import citeulike
from cornac.eval_methods import RatioSplit
from cornac.data import TextModule
from cornac.data.text import BaseTokenizer

docs, item_ids = citeulike.load_text()
data = citeulike.load_data(reader=Reader(item_set=item_ids))

# build text module
item_text_module = TextModule(corpus=docs,
                              ids=item_ids,
                              tokenizer=BaseTokenizer('\t'),
                              max_vocab=8000,
                              max_doc_freq=0.5,
                              stop_words='english')

ratio_split = RatioSplit(data=data,
                         test_size=0.2,
                         exclude_unknowns=True,
                         item_text=item_text_module,
                         verbose=True,
                         seed=123,
Beispiel #2
0
 def test_load_data(self):
     # only run data download tests 20% of the time to speed up frequent testing
     random.seed(time.time())
     if random.random() > 0.8:
         data = citeulike.load_data()
         self.assertEqual(len(data), 210537)