# -*- coding: utf-8 -*- """ Example for Collaborative Deep Ranking @author: Tran Thanh Binh """ import cornac from cornac.data import Reader from cornac.datasets import citeulike from cornac.eval_methods import RatioSplit from cornac.data import TextModule from cornac.data.text import BaseTokenizer docs, item_ids = citeulike.load_text() data = citeulike.load_data(reader=Reader(item_set=item_ids)) # build text module item_text_module = TextModule(corpus=docs, ids=item_ids, tokenizer=BaseTokenizer('\t'), max_vocab=8000, max_doc_freq=0.5, stop_words='english') ratio_split = RatioSplit(data=data, test_size=0.2, exclude_unknowns=True, item_text=item_text_module, verbose=True, seed=123,
def test_load_data(self): # only run data download tests 20% of the time to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: data = citeulike.load_data() self.assertEqual(len(data), 210537)