def test_big_pages(self): ''' Test a document with *many* tokens per page. ''' path = os.path.join('tests', 'data', 'aeu.ark+=13960=t1rf63t52.json.bz2') feature_reader = FeatureReader(path) volume = feature_reader.first() tokenlist = volume.tokenlist() assert tokenlist.shape[0] == 56397
def test_first(self, paths, titles): feature_reader = FeatureReader(paths) vol = feature_reader.first() assert type(vol) == htrc_features.feature_reader.Volume assert vol.title == titles[0]
def test_first(self, paths): feature_reader = FeatureReader(paths) vol = feature_reader.first() assert type(vol) == htrc_features.feature_reader.Volume assert vol.title == self.TITLES[0]