def ingest():

    """
    Load fields.
    """

    Subfield.ingest()
def test_clean_field_names():
    """
    Field and subfield names should be sanitized.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/clean_field_names.csv',
    )

    assert Field.select().where(Field.name == 'Field1')

    assert Subfield.select().where(Subfield.name == 'Subfield1')
    assert Subfield.select().where(Subfield.name == 'Subfield2')
    assert Subfield.select().where(Subfield.name == 'Subfield3')
def doc_to_fields(doc_id, radius=100):

    """
    Search for field / department codes in a document.

    Args:
        doc_id (int)
        radius (int)
    """

    doc_text = Document_Text.get(Document_Text.document==doc_id)

    # Search for each field.
    for subfield in Subfield.select():

        match = subfield.search(doc_text.text)

        # If found, link field -> doc.
        if match:

            # Slice out the snippet.
            i1 = max(match.start() - radius, 0)
            i2 = min(match.end() + radius, len(doc_text.text))
            snippet = doc_text.text[i1:i2]

            Subfield_Document.create(
                subfield=subfield,
                document=doc_text.document,
                offset=match.start(),
                snippet=crunch(snippet),
            )
def test_clean_field_names():

    """
    Field and subfield names should be sanitized.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/clean_field_names.csv',
    )

    assert Field.select().where(Field.name=='Field1')

    assert Subfield.select().where(Subfield.name=='Subfield1')
    assert Subfield.select().where(Subfield.name=='Subfield2')
    assert Subfield.select().where(Subfield.name=='Subfield3')
def doc_to_fields(doc_id, radius=100):
    """
    Search for field / department codes in a document.

    Args:
        doc_id (int)
        radius (int)
    """

    doc_text = Document_Text.get(Document_Text.document == doc_id)

    # Search for each field.
    for subfield in Subfield.select():

        match = subfield.search(doc_text.text)

        # If found, link field -> doc.
        if match:

            # Slice out the snippet.
            i1 = max(match.start() - radius, 0)
            i2 = min(match.end() + radius, len(doc_text.text))
            snippet = doc_text.text[i1:i2]

            Subfield_Document.create(
                subfield=subfield,
                document=doc_text.document,
                offset=match.start(),
                snippet=crunch(snippet),
            )
def test_filter_abbrs():
    """
    Blacklisted abbreviations should be filtered out.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/filter_abbrs.csv',
    )

    sf1 = Subfield.get(Subfield.name == 'Subfield1')
    sf2 = Subfield.get(Subfield.name == 'Subfield2')
    sf3 = Subfield.get(Subfield.name == 'Subfield3')

    assert sf1.abbreviations == ['SF1']
    assert sf2.abbreviations == ['SF2']
    assert sf3.abbreviations == ['SF3']
def test_parse_abbrs():
    """
    Abbreviations should be parsed.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/parse_abbrs.csv',
    )

    sf1 = Subfield.get(Subfield.name == 'Subfield1')
    sf2 = Subfield.get(Subfield.name == 'Subfield2')
    sf3 = Subfield.get(Subfield.name == 'Subfield3')

    assert sf1.abbreviations == ['AB1', 'AB2']
    assert sf2.abbreviations == ['AB3', 'AB4']
    assert sf3.abbreviations == ['AB5', 'AB6']
def test_es_insert():
    """
    Subfield_Index.es_insert() should load all fields into Elasticsearch
    """

    Subfield.ingest()

    Subfield_Index.es_insert()

    for sf in Subfield.select():

        doc = config.es.get(
            index='subfield',
            id=sf.id,
        )

        assert doc['_source']['name'] == sf.name
def test_es_insert():

    """
    Field_Index.es_insert() should load all fields into Elasticsearch
    """

    Subfield.ingest()

    Field_Index.es_insert()

    for field in Field.select():

        doc = config.es.get(
            index='field',
            id=field.id,
        )

        assert doc['_source']['name'] == field.name
def test_parse_abbrs():

    """
    Abbreviations should be parsed.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/parse_abbrs.csv',
    )

    sf1 = Subfield.get(Subfield.name=='Subfield1')
    sf2 = Subfield.get(Subfield.name=='Subfield2')
    sf3 = Subfield.get(Subfield.name=='Subfield3')

    assert sf1.abbreviations == ['AB1', 'AB2']
    assert sf2.abbreviations == ['AB3', 'AB4']
    assert sf3.abbreviations == ['AB5', 'AB6']
def test_filter_abbrs():

    """
    Blacklisted abbreviations should be filtered out.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/filter_abbrs.csv',
    )

    sf1 = Subfield.get(Subfield.name=='Subfield1')
    sf2 = Subfield.get(Subfield.name=='Subfield2')
    sf3 = Subfield.get(Subfield.name=='Subfield3')

    assert sf1.abbreviations == ['SF1']
    assert sf2.abbreviations == ['SF2']
    assert sf3.abbreviations == ['SF3']
    def subfield(self):
        """
        Get the document's subfield, if any.

        Returns: Subfield
        """

        return (Subfield.select().join(Subfield_Document).join(Document).where(
            Document.id == self.document).order_by(
                Subfield_Document.offset.asc()).first())
    def _subfield(
        name='Field',
        abbreviations=None,
        field=None,
    ):

        if not field:
            field = Field.create(name='Parent')

        return Subfield.create(
            name=name,
            abbreviations=abbreviations,
            field=field,
        )
    def _subfield(
        name='Field',
        abbreviations=None,
        field=None,
    ):

        if not field:
            field = Field.create(name='Parent')

        return Subfield.create(
            name=name,
            abbreviations=abbreviations,
            field=field,
        )
    def es_stream_docs(cls):

        """
        Index subfields.

        Yields:
            dict: The next document.
        """

        for row in query_bar(Subfield.select()):

            yield dict(
                _id = row.id,
                name = row.name,
            )
    def es_stream_docs(cls):

        """
        Index subfields.

        Yields:
            dict: The next document.
        """

        for row in query_bar(Subfield.select()):

            yield dict(
                _id = row.id,
                name = row.name,
            )
def ingest():
    """
    Load fields.
    """

    Subfield.ingest()
def test_insert_rows():

    """
    Subfield.ingest() should load field and subfield rows.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/insert_rows.csv',
    )

    assert Field.select().count() == 3
    assert Subfield.select().count() == 9

    f1 = Field.get(Field.name=='Field1')
    f2 = Field.get(Field.name=='Field2')
    f3 = Field.get(Field.name=='Field3')

    sf1 = Subfield.get(Subfield.name=='Subfield1')
    sf2 = Subfield.get(Subfield.name=='Subfield2')
    sf3 = Subfield.get(Subfield.name=='Subfield3')
    sf4 = Subfield.get(Subfield.name=='Subfield4')
    sf5 = Subfield.get(Subfield.name=='Subfield5')
    sf6 = Subfield.get(Subfield.name=='Subfield6')
    sf7 = Subfield.get(Subfield.name=='Subfield7')
    sf8 = Subfield.get(Subfield.name=='Subfield8')
    sf9 = Subfield.get(Subfield.name=='Subfield9')

    assert sf1.field == f1
    assert sf2.field == f1
    assert sf3.field == f1

    assert sf4.field == f2
    assert sf5.field == f2
    assert sf6.field == f2

    assert sf7.field == f3
    assert sf8.field == f3
    assert sf9.field == f3
def test_insert_rows():
    """
    Subfield.ingest() should load field and subfield rows.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/insert_rows.csv',
    )

    assert Field.select().count() == 3
    assert Subfield.select().count() == 9

    f1 = Field.get(Field.name == 'Field1')
    f2 = Field.get(Field.name == 'Field2')
    f3 = Field.get(Field.name == 'Field3')

    sf1 = Subfield.get(Subfield.name == 'Subfield1')
    sf2 = Subfield.get(Subfield.name == 'Subfield2')
    sf3 = Subfield.get(Subfield.name == 'Subfield3')
    sf4 = Subfield.get(Subfield.name == 'Subfield4')
    sf5 = Subfield.get(Subfield.name == 'Subfield5')
    sf6 = Subfield.get(Subfield.name == 'Subfield6')
    sf7 = Subfield.get(Subfield.name == 'Subfield7')
    sf8 = Subfield.get(Subfield.name == 'Subfield8')
    sf9 = Subfield.get(Subfield.name == 'Subfield9')

    assert sf1.field == f1
    assert sf2.field == f1
    assert sf3.field == f1

    assert sf4.field == f2
    assert sf5.field == f2
    assert sf6.field == f2

    assert sf7.field == f3
    assert sf8.field == f3
    assert sf9.field == f3