def ingest(cls, package="osp.fields", path="data/fields.csv"): """ Ingest fields. Args: package (str) path (str) """ reader = read_csv(package, path) for row in reader: # Sanifize field names. pf = clean_field_name(row["Primary Field"]) sf = clean_field_name(row["Secondary Field"]) # Parse abbreviations. abbrs = parse_abbrs(row["ABBRV"]) if abbrs: abbrs = filter_abbrs(abbrs) # If parent field, write row. if bool(row["Alpha Category"]): Field.create(name=pf) # Query for a parent field. field = Field.select().where(Field.name == pf).first() if field: Subfield.create(name=sf, abbreviations=abbrs, field=field)
def ingest(cls, package='osp.fields', path='data/fields.csv'): """ Ingest fields. Args: package (str) path (str) """ reader = read_csv(package, path) for row in reader: # Sanifize field names. pf = clean_field_name(row['Primary Field']) sf = clean_field_name(row['Secondary Field']) # Parse abbreviations. abbrs = parse_abbrs(row['ABBRV']) if abbrs: abbrs = filter_abbrs(abbrs) # If parent field, write row. if bool(row['Alpha Category']): Field.create(name=pf) # Query for a parent field. field = Field.select().where(Field.name == pf).first() if field: Subfield.create( name=sf, abbreviations=abbrs, field=field, )
def es_stream_docs(cls): """ Index fields. Yields: dict: The next document. """ for row in query_bar(Field.select()): yield dict( _id = row.id, name = row.name, )
def test_clean_field_names(): """ Field and subfield names should be sanitized. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/clean_field_names.csv', ) assert Field.select().where(Field.name == 'Field1') assert Subfield.select().where(Subfield.name == 'Subfield1') assert Subfield.select().where(Subfield.name == 'Subfield2') assert Subfield.select().where(Subfield.name == 'Subfield3')
def test_clean_field_names(): """ Field and subfield names should be sanitized. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/clean_field_names.csv', ) assert Field.select().where(Field.name=='Field1') assert Subfield.select().where(Subfield.name=='Subfield1') assert Subfield.select().where(Subfield.name=='Subfield2') assert Subfield.select().where(Subfield.name=='Subfield3')
def test_es_insert(): """ Field_Index.es_insert() should load all fields into Elasticsearch """ Subfield.ingest() Field_Index.es_insert() for field in Field.select(): doc = config.es.get( index='field', id=field.id, ) assert doc['_source']['name'] == field.name
def test_insert_rows(): """ Subfield.ingest() should load field and subfield rows. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/insert_rows.csv', ) assert Field.select().count() == 3 assert Subfield.select().count() == 9 f1 = Field.get(Field.name=='Field1') f2 = Field.get(Field.name=='Field2') f3 = Field.get(Field.name=='Field3') sf1 = Subfield.get(Subfield.name=='Subfield1') sf2 = Subfield.get(Subfield.name=='Subfield2') sf3 = Subfield.get(Subfield.name=='Subfield3') sf4 = Subfield.get(Subfield.name=='Subfield4') sf5 = Subfield.get(Subfield.name=='Subfield5') sf6 = Subfield.get(Subfield.name=='Subfield6') sf7 = Subfield.get(Subfield.name=='Subfield7') sf8 = Subfield.get(Subfield.name=='Subfield8') sf9 = Subfield.get(Subfield.name=='Subfield9') assert sf1.field == f1 assert sf2.field == f1 assert sf3.field == f1 assert sf4.field == f2 assert sf5.field == f2 assert sf6.field == f2 assert sf7.field == f3 assert sf8.field == f3 assert sf9.field == f3
def test_insert_rows(): """ Subfield.ingest() should load field and subfield rows. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/insert_rows.csv', ) assert Field.select().count() == 3 assert Subfield.select().count() == 9 f1 = Field.get(Field.name == 'Field1') f2 = Field.get(Field.name == 'Field2') f3 = Field.get(Field.name == 'Field3') sf1 = Subfield.get(Subfield.name == 'Subfield1') sf2 = Subfield.get(Subfield.name == 'Subfield2') sf3 = Subfield.get(Subfield.name == 'Subfield3') sf4 = Subfield.get(Subfield.name == 'Subfield4') sf5 = Subfield.get(Subfield.name == 'Subfield5') sf6 = Subfield.get(Subfield.name == 'Subfield6') sf7 = Subfield.get(Subfield.name == 'Subfield7') sf8 = Subfield.get(Subfield.name == 'Subfield8') sf9 = Subfield.get(Subfield.name == 'Subfield9') assert sf1.field == f1 assert sf2.field == f1 assert sf3.field == f1 assert sf4.field == f2 assert sf5.field == f2 assert sf6.field == f2 assert sf7.field == f3 assert sf8.field == f3 assert sf9.field == f3