forked from wcaleb/omekadd
/
omekaclient.py
executable file
·254 lines (215 loc) · 10.7 KB
/
omekaclient.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import httplib2
import urllib
import mimetypes
import json
import os
import re
from omekautils import create_null_logger
class OmekaItem:
def __init__(self):
self.data = {}
class OmekaElement:
def __init__(self):
self.data = {}
class OmekaCollection:
def __init__(self):
self.data = {}
class OmekaClient:
def __init__(self, endpoint, logger=None, key=None):
self._endpoint = endpoint
self._key = key
self._http = httplib2.Http()
self.sets = {} #Keep a dict of element sets keyed by name
self.elements = {} #Dict of elements keyed by name then set-id
self.collections = {} #Dict of collections keyed by Title
self.vocabs = {} #Dict of vocabularies keyed by namespace prefix
self.relation_properties = {} # Dict of Item Relations Properties keyed by vocab id, then name
self.dublinCoreID = self.getSetId("Dublin Core")
self.omekaMetadataID = self.getSetId("Omeka Metadata")
if logger is None:
self.logger = create_null_logger("OmekaClient")
else:
self.logger = logger
self.types = {} # Dict of item_types
def addItemRelation(self, subject_id, property_id, object_id):
"""Relate two items (for now has a check to make sure they aren't related in the same way already until that can be baked into the API"""
relation_data = {"subject_item_id": subject_id,
"object_item_id": object_id,
"property_id": property_id}
response, content = self.get('item_relations', query=relation_data)
res = json.loads(content)
if len(res) == 0:
response, content = self.post('item_relations', json.dumps(relation_data))
self.logger.info("Response = %s, content = %s", response, content);
else:
self.logger.info("Already related")
def getItemTypeId(self, name, create=False):
"""Find item_type ID by name and cache the results:"""
if name in self.types:
return self.types[name]["id"]
else:
response, content = self.get('item_types', query={"name":name})
types_data = json.loads(content)
if types_data <> []:
self.types[name] = types_data[0]
return types_data[0]["id"]
elif create:
self.logger.info("Item type %s not found, attempting to make one" % name)
response, content = self.post('item_types', json.dumps({"name": name}))
types_data = json.loads(content)
print types_data
self.types[name] = types_data
return types_data["id"]
else:
return None
def getVocabularyId(self, name):
"""Find an the ID of a vocabulary using its prefix (eg dcterms)"""
if not name in self.vocabs:
response, content = self.get('item_relations_vocabularies', query={"namespace_prefix": name})
res = json.loads(content)
if res <> []:
self.vocabs[name] = res[0]
else:
return None
return self.vocabs[name]["id"]
def getRelationPropertyId(self, prefix, label):
"""Find an the ID of a vocabulary using its prefix (eg dcterms)"""
vocab_id = self.getVocabularyId(prefix)
if vocab_id <> None:
if not vocab_id in self.relation_properties:
self.relation_properties[vocab_id] = {}
if not label in self.relation_properties[vocab_id]:
response, content = self.get('item_relations_properties', query={"label": label, "vocabulary_id": vocab_id})
res = json.loads(content)
if res <> []:
self.relation_properties[vocab_id][label] = res[0]
else:
return None
return self.relation_properties[vocab_id][label]["id"]
def getSetId(self, name, create=False):
"""Find an Omeka element_set by name and cache the results"""
if not name in self.sets:
response, content = self.get('element_sets', query={"name": name})
res = json.loads(content)
if res <> [] or create:
if create and res == []:
response, content = self.post('element_sets', json.dumps({"name": name}))
set_data = json.loads(content)
else:
set_data = res[0]
self.sets[name] = set_data
else:
return None
return self.sets[name]["id"]
def getElementId(self, set_id, name, create=False):
"""Find all the elements by element name and cache the results keyed by name / element set id"""
if not name in self.elements:
response, content = self.get('elements', query={"name": name, "element_set": set_id})
res = json.loads(content)
if res <> [] or create:
if create and res == []: #TODO deal with t
response, content = self.post('elements', json.dumps({"name": name, "element_set" : {"id": set_id}}))
self.logger.info("Trying to make an element %s %s", response, content)
el_data = json.loads(content)
else:
el_data = res[0]
if not name in self.elements:
self.elements[name] = {}
self.elements[name][set_id] = el_data
if name in self.elements and set_id in self.elements[name] and "id" in self.elements[name][set_id]:
return self.elements[name][set_id]["id"]
else:
return None
def getCollectionId(self, name, create=False):
"""Find an Omeka collection by name and cache the results. Does not deal with collections with the same title"""
def getTitle(collection):
for t in collection['element_texts']:
if t['element']['name'] == 'Title':
self.collections[t['text']] = collection
if self.collections == {}:
response, content = self.get('collections')
collections_data = json.loads(content)
for collection in collections_data:
getTitle(collection)
if not name in self.collections and create:
title_id = self.getElementId(self.dublinCoreID, "Title")
element_text = {"html": False, "text": name}
element_text["element"] = {"id": title_id }
response, content = self.post('collections', json.dumps({"element_texts": [element_text]}))
collection = json.loads(content)
getTitle(collection)
return self.collections[name]["id"] if name in self.collections else None
def get(self, resource, id=None, query={}):
return self._request("GET", resource, id=id, query=query)
def post(self, resource, data, query={}, headers={}):
return self._request("POST", resource, data=data, query=query, headers=headers)
def post_file_from_filename(self, file, id):
if os.path.exists(file):
size = os.path.getsize(file)
filename = os.path.split(file)[-1]
res, content = self.get("files",query={"item": id})
attachments = json.loads(content)
upload_this = True
for attachment in attachments:
if attachment["size"] == size and attachment["original_filename"] == filename:
self.logger.warning("********** There is already a %d byte file named %s, not uploading *******", size,filename)
upload_this = False
if upload_this:
uploadjson = {"item": {"id": id}}
uploadmeta = json.dumps(uploadjson)
with open(file) as f:
content = f.read()
f.close()
return self.post_file(uploadmeta, filename, content)
else:
self.error("File %s not found", file)
def put(self, resource, id, data, query={}):
return self._request("PUT", resource, id, data=data, query=query)
def delete(self, resource, id, query={}):
return self._request("DELETE", resource, id, query=query)
def post_file(self, data, filename, contents):
""" data is JSON metadata, filename is a string, contents is file contents """
BOUNDARY = '----------E19zNvXGzXaLvS5C'
CRLF = '\r\n'
headers = {'Content-Type': 'multipart/form-data; boundary=' + BOUNDARY}
L = []
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="data"')
L.append('')
L.append(data)
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="file"; filename="%s"' % filename)
L.append('Content-Type: %s' % self.get_content_type(filename))
L.append('')
L.append(contents)
L.append('--' + BOUNDARY)
body = CRLF.join(L)
headers['content-length'] = str(len(body))
query = {}
return self.post("files", body, query, headers)
def get_content_type(self, filename):
""" use mimetypes to detect type of file to be uploaded """
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
def _request(self, method, resource, id=None, data=None, query=None, headers=None):
url = self._endpoint + "/" + resource
if id is not None:
url += "/" + str(id)
if self._key is not None:
query["key"] = self._key
url += "?" + urllib.urlencode(query)
resp, content = self._http.request(url, method, body=data, headers=headers)
links = resp['link'] if 'link' in resp else ""
for link in links.split(", "):
l = link.split("; ")
if l[-1] == 'rel="next"':
pages = re.findall(r'\Wpage=(\d+)', l[0])
per_pages = re.findall(r'\Wper_page=(\d+)', l[0])
page = int(pages[0]) if len(pages) > 0 else None
per_page = int(per_pages[0]) if len(per_pages) > 0 else None
if page and per_page:
query['page'] = page
query['per_page'] = per_page
resp, cont = self._request(method, resource, id, data, query, headers)
content = json.dumps(json.loads(content) + json.loads(cont))
#Returns strings - this is not ideal but to fix would require a breaking change
return resp, content