def test_syllabi(mock_osp): """ Corpus#syllabi() should generate Syllabus instances for all files. """ # 10 segments, each with 10 files. for s in segment_range(10): for i in range(10): mock_osp.add_file(segment=s, name=s + '-' + str(i)) corpus = Corpus(mock_osp.path) syllabi = corpus.syllabi() # Walk segments / files: for s in segment_range(10): for i in range(10): # Should be a Syllabus instance. syllabus = next(syllabi) assert isinstance(syllabus, Syllabus) # Should generate the next file path. name = s + '-' + str(i) path = os.path.join(corpus.path, s, name) assert syllabus.path == path # And stop at the end. assert next(syllabi, False) == False
def test_file_paths(mock_osp): """ Corpus#file_paths() should generate the paths of files in all segments. """ # 10 segments, each with 10 files. for s in segment_range(10): for i in range(10): mock_osp.add_file(segment=s, name=s+'-'+str(i)) corpus = Corpus(mock_osp.path) paths = corpus.file_paths() # Walk segments / files: for s in segment_range(10): for i in range(10): # Should generate the next file path. name = s+'-'+str(i) path = os.path.join(corpus.path, s, name) assert next(paths) == path # And stop at the end. assert next(paths, False) == False
def test_syllabi(mock_osp): """ Corpus#syllabi() should generate Syllabus instances for all files. """ # 10 segments, each with 10 files. for s in segment_range(10): for i in range(10): mock_osp.add_file(segment=s, name=s+'-'+str(i)) corpus = Corpus(mock_osp.path) syllabi = corpus.syllabi() # Walk segments / files: for s in segment_range(10): for i in range(10): # Should be a Syllabus instance. syllabus = next(syllabi) assert isinstance(syllabus, Syllabus) # Should generate the next file path. name = s+'-'+str(i) path = os.path.join(corpus.path, s, name) assert syllabus.path == path # And stop at the end. assert next(syllabi, False) == False
def file_count(): """ Print the total number of files. """ corpus = Corpus.from_env() click.echo(corpus.file_count)
def test_missing_segments(mock_osp): """ The generator should gracefully skip missing segments. """ # Add segments 0-10. mock_osp.add_segments(s1=0, s2=10) # Request segments 0-20. corpus = Corpus(mock_osp.path, s1=0, s2=20) segments = corpus.segments() # Should yield 10 segments. for i in range(10): assert_segment(corpus, next(segments), i) # And then stop. assert next(segments, False) == False
def insert_documents(cls): """ Insert a document row for each syllabus in the corpus. """ for syllabus in Corpus.from_env().syllabi_bar(): try: cls.create(path=syllabus.relative_path) except: pass
def test_bounded_partition(mock_osp): """ When segment boundaries are passed, just the segments that fall within the requested range should be provided. """ # Add segments 0-10. mock_osp.add_segments(s1=0, s2=10) # Request segments 0-10 corpus = Corpus(mock_osp.path, s1=0, s2=10) segments = corpus.segments() # Should yield 10 segments. for i in range(10): assert_segment(corpus, next(segments), i) # And then stop. assert next(segments, False) == False
def test_full_partition(mock_osp): """ When no segment boundaries are passed to Corpus#segments(), instances for all 4096 segments should be generated. """ # Add all segments. mock_osp.add_segments() # Request all segments. corpus = Corpus(mock_osp.path) segments = corpus.segments() # Should yield all segments. for i in range(4095): assert_segment(corpus, next(segments), i) # But no more. assert next(segments, False) == False
def test_file_count(mock_osp): """ Corpus#file_count should return the number of files in all segments. """ # 10 segments, each with 10 files. for s in segment_range(10): for i in range(10): mock_osp.add_file(segment=s, name=str(i)) corpus = Corpus(mock_osp.path) assert corpus.file_count == 100
def test_file_paths(mock_osp): """ Corpus#file_paths() should generate the paths of files in all segments. """ # 10 segments, each with 10 files. for s in segment_range(10): for i in range(10): mock_osp.add_file(segment=s, name=s + '-' + str(i)) corpus = Corpus(mock_osp.path) paths = corpus.file_paths() # Walk segments / files: for s in segment_range(10): for i in range(10): # Should generate the next file path. name = s + '-' + str(i) path = os.path.join(corpus.path, s, name) assert next(paths) == path # And stop at the end. assert next(paths, False) == False