Beispiel #1
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Example for HFT with Movilen 1m dataset """

import cornac
from cornac.data import Reader
from cornac.datasets import movielens
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer

plots, movie_ids = movielens.load_plot()
ml_1m = movielens.load_1m(reader=Reader(item_set=movie_ids))

# build text module
item_text_modality = TextModality(corpus=plots,
                                  ids=movie_ids,
                                  tokenizer=BaseTokenizer(
                                      sep='\t', stop_words='english'),
                                  max_vocab=5000,
                                  max_doc_freq=0.5)

ratio_split = RatioSplit(data=ml_1m,
                         test_size=0.2,
                         exclude_unknowns=True,
                         item_text=item_text_modality,
                         verbose=True,
Beispiel #2
0
 def test_load_plot(self):
     # only run data download tests 20% of the time to speed up frequent testing
     random.seed(time.time())
     if random.random() > 0.8:
         plots, ids = movielens.load_plot()
         self.assertEqual(len(ids), 10076)