def test_ignore_non_xml_files(mock_jstor):

    """
    JSTOR_Corpus#paths() should generate manifest paths.
    """

    # 3 XML manifests.
    paths = [
        mock_jstor.add_article(),
        mock_jstor.add_article(),
        mock_jstor.add_article(),
    ]

    dirname = os.path.dirname(list(paths)[0])

    # 3 non-XML files.
    for ext in ['js', 'zip', 'txt']:
        with open(os.path.join(dirname, 'test.'+ext), 'w') as fh:
            print('content', file=fh)

    corpus = JSTOR_Corpus(mock_jstor.path)
    output = list(corpus.paths())

    assert set(output) == set(paths)
    assert len(output) == 3
def test_generate_paths(mock_jstor):
    """
    JSTOR_Corpus#paths() should generate manifest paths.
    """

    paths = [
        mock_jstor.add_article(),
        mock_jstor.add_article(),
        mock_jstor.add_article(),
    ]

    corpus = JSTOR_Corpus(mock_jstor.path)
    output = list(corpus.paths())

    assert set(output) == set(paths)
    assert len(output) == 3
def test_generate_paths(mock_jstor):

    """
    JSTOR_Corpus#paths() should generate manifest paths.
    """

    paths = [
        mock_jstor.add_article(),
        mock_jstor.add_article(),
        mock_jstor.add_article(),
    ]

    corpus = JSTOR_Corpus(mock_jstor.path)
    output = list(corpus.paths())

    assert set(output) == set(paths)
    assert len(output) == 3
예제 #4
0
파일: text.py 프로젝트: davidmcclure/osp
    def ingest_jstor(cls):

        """
        Ingest JSTOR records.
        """

        corpus = JSTOR_Corpus.from_env()

        for i, text in enumerate(corpus.texts()):

            try: cls.create(**text)
            except Exception as e: print(e)

            sys.stdout.write('\r'+str(i))
            sys.stdout.flush()
def test_ignore_non_xml_files(mock_jstor):
    """
    JSTOR_Corpus#paths() should generate manifest paths.
    """

    # 3 XML manifests.
    paths = [
        mock_jstor.add_article(),
        mock_jstor.add_article(),
        mock_jstor.add_article(),
    ]

    dirname = os.path.dirname(list(paths)[0])

    # 3 non-XML files.
    for ext in ['js', 'zip', 'txt']:
        with open(os.path.join(dirname, 'test.' + ext), 'w') as fh:
            print('content', file=fh)

    corpus = JSTOR_Corpus(mock_jstor.path)
    output = list(corpus.paths())

    assert set(output) == set(paths)
    assert len(output) == 3
    def ingest_jstor(cls):

        """
        Ingest JSTOR records.
        """

        corpus = JSTOR_Corpus.from_env()

        for i, text in enumerate(corpus.texts()):

            try: cls.create(**text)
            except Exception as e: print(e)

            sys.stdout.write('\r'+str(i))
            sys.stdout.flush()