def split_names_corpus(self, document_class=Name): """Split the names corpus into training, dev, and test sets""" names = NamesCorpus(document_class=document_class) self.assertEqual(len(names), 5001 + 2943) # see names/README seed(hash("names")) shuffle(names) return (names[:5000], names[5000:6000], names[6000:])
def split_names_corpus(self, document_class=Name): names = NamesCorpus(document_class=document_class) self.assertEqual(len(names), 5001 + 2943) # see names/README seed(hash("names")) shuffle(names) return (names[:5000], names[5000:6000], names[6000:])