def doc_to_fields(doc_id, radius=100):

    """
    Search for field / department codes in a document.

    Args:
        doc_id (int)
        radius (int)
    """

    doc_text = Document_Text.get(Document_Text.document==doc_id)

    # Search for each field.
    for subfield in Subfield.select():

        match = subfield.search(doc_text.text)

        # If found, link field -> doc.
        if match:

            # Slice out the snippet.
            i1 = max(match.start() - radius, 0)
            i2 = min(match.end() + radius, len(doc_text.text))
            snippet = doc_text.text[i1:i2]

            Subfield_Document.create(
                subfield=subfield,
                document=doc_text.document,
                offset=match.start(),
                snippet=crunch(snippet),
            )
def doc_to_fields(doc_id, radius=100):
    """
    Search for field / department codes in a document.

    Args:
        doc_id (int)
        radius (int)
    """

    doc_text = Document_Text.get(Document_Text.document == doc_id)

    # Search for each field.
    for subfield in Subfield.select():

        match = subfield.search(doc_text.text)

        # If found, link field -> doc.
        if match:

            # Slice out the snippet.
            i1 = max(match.start() - radius, 0)
            i2 = min(match.end() + radius, len(doc_text.text))
            snippet = doc_text.text[i1:i2]

            Subfield_Document.create(
                subfield=subfield,
                document=doc_text.document,
                offset=match.start(),
                snippet=crunch(snippet),
            )
예제 #3
0
def test_unique_pairs(add_subfield, add_doc):
    """
    Don't allow duplicate links between the same field -> document.
    """

    s = add_subfield()
    d = add_doc()

    Subfield_Document.create(subfield=s, document=d, offset=1, snippet='abc')

    with pytest.raises(IntegrityError):

        Subfield_Document.create(subfield=s,
                                 document=d,
                                 offset=2,
                                 snippet='def')
    def _subfield_document(
        subfield=None,
        document=None,
        snippet='field',
        offset=100,
    ):

        if not subfield:
            subfield = add_subfield()

        if not document:
            document = add_doc()

        return Subfield_Document.create(
            subfield=subfield,
            document=document,
            offset=offset,
            snippet=snippet,
        )
예제 #5
0
    def _subfield_document(
        subfield=None,
        document=None,
        snippet='field',
        offset=100,
    ):

        if not subfield:
            subfield = add_subfield()

        if not document:
            document = add_doc()

        return Subfield_Document.create(
            subfield=subfield,
            document=document,
            offset=offset,
            snippet=snippet,
        )