-
Notifications
You must be signed in to change notification settings - Fork 0
/
allitems.py
71 lines (61 loc) · 2.14 KB
/
allitems.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from dpla.api import DPLA
import fields
"""
allitems
Demonstrate the use of Python generators with the DPLA API
"""
def search(query, api_key, limit=10):
"""Yield search results item by item"""
page_size = _page_size(limit)
dpla = DPLA(api_key)
page = 0
yielded = 0
while yielded < limit:
page += 1
result = dpla.search(query, page_size=page_size, page=page,
fields=['sourceResource'])
if len(result.items):
for item in result.items:
yield item
yielded += 1
else:
raise StopIteration
def _page_size(limit):
"""Return suitable API page size for desired query limit"""
if limit > 500:
return 500
else:
return limit
def strings_from_field(field_token, search):
"""Yield strings from a given field defined by the specified function
The field in the MAP hierarchy is specified in dotted notation; for
example, `sourceResource.subject.name'. Note that the field-parsing
function is assumed to be smart enough to apply the logic necessary for
that specific field. In this example, sourceResource.subject may be a
list (JSON array) of dictionaries (JSON objects) that have 'name' keys.
That's fine. Just name the property name regardless of cardinality.
Example:
import allitems
search = allitems.search('fizz', 'YOUR_KEY)
ff = allitems.strings_from_field('sourceResource.subject.name',
search)
print "\n".join([subj for subj in ff])
"""
try:
f = getattr(fields, field_token.replace('.', '_'))
for item in search:
for s in _strings(f(item)):
yield s
except AttributeError, e:
# No function matching field_token
raise StopIteration
def _strings(thing):
"""Yield strings from elements that could be strings or lists of them"""
if isinstance(thing, basestring):
yield thing
elif type(thing) == list:
for el in thing:
for s in _strings(el):
yield s
else:
raise StopIteration