Python Document.create 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: osp.corpus.models.document

클래스/타입: Document

메소드/함수: create

hotexamples.com에서의 예제들: 12

Python Document.create - 12개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 osp.corpus.models.document.Document.create에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

create(12)

select(7)

get(5)

insert_documents(3)

자주 사용되는 메소드들

create (12)

select (7)

get (5)

insert_documents (3)

예제 #1

파일 보기

파일: test_es_insert.py 프로젝트: overview/osp

def test_es_insert(models, config, corpus_index):

    """
    CorpusIndex.index() should index all rows in Elasticsearch.
    """

    # Index 100 documents.
    for i in range(10):
        doc = Document.create(path=str(i))
        Document_Text.create(document=doc, text=str(i))

    Document_Text.es_insert()

    # Should insert 10 docs.
    assert Document_Text.es_count() == 10

    # For each text row:
    for t in Document_Text.select():

        # A document should exist.
        doc = config.es.get('osp', t.document.path)

        # Should index text / doc ID.
        assert doc['_source']['doc_id'] == t.document.id
        assert doc['_source']['body']   == t.document.path

예제 #2

파일 보기

파일: test_ext_semester.py 프로젝트: overview/osp

def test_link_with_document(models, mock_osp):

    """
    When a semester marker is found, the metadata row should be associated
    with the document that was passed to the job.
    """

    # 2 document rows.
    doc1 = Document.create(path='path1')
    doc2 = Document.create(path='path2')

    # Just 1 text row.
    doc_text = Document_Text.create(document=doc2, text='Fall 2012')
    assert doc_text.id != doc_text.document.id

    row = ext_semester(doc2.id)
    assert row.document == doc2

예제 #3

파일 보기

파일: test_es_doc.py 프로젝트: overview/osp

def test_es_doc(models):

    """
    Document_Text#es_doc() should return an Elasticsearch document.
    """

    doc = Document.create(path='000/abc')
    text = Document_Text.create(document=doc, text='text')

    assert text.es_doc['_id']       == '000/abc'
    assert text.es_doc['doc_id']    == doc.id
    assert text.es_doc['body']      == 'text'

예제 #4

파일 보기

파일: test_format_counts.py 프로젝트: overview/osp

def test_format_counts(models):

    """
    Document.format_counts()
    """

    d1 = Document.create(path='1')
    d2 = Document.create(path='2')
    d3 = Document.create(path='3')
    d4 = Document.create(path='4')
    d5 = Document.create(path='5')
    d6 = Document.create(path='6')

    # 1 doc with 'format1'.
    f1 = Document_Format.create(document=d1, format='format1')

    # 2 docs with 'format2'.
    f2 = Document_Format.create(document=d2, format='format2')
    f3 = Document_Format.create(document=d3, format='format2')

    # 3 docs with 'format3'.
    f4 = Document_Format.create(document=d4, format='format3')
    f5 = Document_Format.create(document=d5, format='format3')
    f6 = Document_Format.create(document=d6, format='format3')

    assert Document_Format.format_counts() == [
        ('format3', 3),
        ('format2', 2),
        ('format1', 1)
    ]

예제 #5

파일 보기

파일: test_institution_counts.py 프로젝트: overview/osp

def test_institution_counts(models):

    """
    Document_Institution.institution_counts() should provide syllabus counts
    for each institution id.
    """

    i1 = Institution.create()
    i2 = Institution.create()
    i3 = Institution.create()

    d1 = Document.create(path='d1')
    d2 = Document.create(path='d2')
    d3 = Document.create(path='d3')
    d4 = Document.create(path='d4')
    d5 = Document.create(path='d5')
    d6 = Document.create(path='d6')

    # 1 document for institution 1.
    Document_Institution.create(institution=i1, document=d1)

    # 2 documents for institution 2.
    Document_Institution.create(institution=i2, document=d2)
    Document_Institution.create(institution=i2, document=d3)

    # 3 documents for institution 3.
    Document_Institution.create(institution=i3, document=d4)
    Document_Institution.create(institution=i3, document=d5)
    Document_Institution.create(institution=i3, document=d6)

    assert Document_Institution.institution_counts() == {
        d1.id: 1,
        d2.id: 2,
        d3.id: 3,
    }

예제 #6

파일 보기

파일: conftest.py 프로젝트: overview/osp

    def _doc(content='content'):

        # Write a file.
        path = mock_osp.add_file(content=content)
        syllabus = Syllabus(path)

        # Insert the document row.
        document = Document.create(path=syllabus.relative_path)

        # Extract text.
        text = ext_text(document.id)

        return document

예제 #7

파일 보기

파일: test_ext_text.py 프로젝트: samzhang111/osp

def test_text_extraction_fails(models, mock_osp):

    """
    If no text can be extracted, don't write the row.
    """

    # Add an empty file.
    path = mock_osp.add_file(content="")
    document = Document.create(path=path)

    ext_text(document.id)

    # Shouldn't write a row.
    assert Document_Text.select().count() == 0

예제 #8

파일 보기

파일: test_ext_format.py 프로젝트: overview/osp

def test_read_format(models, mock_osp):

    """
    read_format() should write the format to the `document_format` table.
    """

    # Add a file, create a document row.
    path = mock_osp.add_file()
    document = Document.create(path=path)

    ext_format(document.id)

    # Pop out the new row.
    row = Document_Format.get(Document_Format.document==document)
    assert row.format == 'text/plain'

예제 #9

파일 보기

파일: test_ext_file_metadata.py 프로젝트: overview/osp

    def _ext(ftype):

        # Create a document.
        path = mock_osp.add_file(ftype=ftype)
        document = Document.create(path=path)

        # Extract the date.
        ext_file_metadata(document.id)

        # Pop out the new row.
        return (
            Document_Date_File_Metadata
            .select()
            .where(Document_Date_File_Metadata.document==document)
            .first()
        )

예제 #10

파일 보기

파일: test_ext_archive_url.py 프로젝트: overview/osp

    def _ext(url):

        # Create a document.
        path = mock_osp.add_file(log={'url': url})
        document = Document.create(path=path)

        # Extract the date.
        ext_archive_url(document.id)

        # Pop out the new row.
        return (
            Document_Date_Archive_Url
            .select()
            .where(Document_Date_Archive_Url.document==document)
            .first()
        )

예제 #11

파일 보기

파일: test_ext_text.py 프로젝트: samzhang111/osp

def test_text_extraction_succeeds(models, mock_osp):

    """
    read_text() should extract text for a document and write the result into
    the `document_text` table.
    """

    # Add a file, create a document row.
    path = mock_osp.add_file(content="text")
    document = Document.create(path=path)

    ext_text(document.id)

    # Pop out the new row.
    row = Document_Text.get(Document_Text.document == document)
    assert row.text == "text"

예제 #12

파일 보기

파일: test_ext_semester.py 프로젝트: overview/osp

    def _ext(content):

        # Create a document.
        path = mock_osp.add_file(content=content)
        document = Document.create(path=path)

        # Extract text, then date.
        ext_text(document.id)
        ext_semester(document.id)

        # Pop out the new row.
        return (
            Document_Date_Semester
            .select()
            .where(Document_Date_Semester.document==document)
            .first()
        )