/
datastream_purge.py
41 lines (26 loc) · 1.04 KB
/
datastream_purge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import sys, urllib2, time
from eulfedora.server import Repository
HOST = 'xxxx'
fedoraUser = 'xxxx'
fedoraPass = 'xxxx'
def main(argv):
repo = Repository(root='%s/fedora/' % HOST, username='%s' % fedoraUser, password='%s' % fedoraPass)
risearch = repo.risearch
query = 'select ?pid where {?pid <fedora-view:disseminates> ?ds . ?pid <fedora-model:hasModel> <info:fedora/islandora:pageCModel> . ?ds <fedora-view:disseminationType> <info:fedora/*/PDF>}'
pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)
#total = 0
for dictionary in pids:
for key in dictionary:
p = dictionary[key]
pid = p.replace('info:fedora/', '')
obj = repo.get_object(pid)
pdf = obj.getDatastreamObject("PDF")
#size = pdf.size
#total += size
obj.api.purgeDatastream(pid, "PDF")
obj.save()
print "Purged PDF for %s" % pid
#time.sleep(0.2)
#print total
if __name__ == '__main__':
sys.exit(main(sys.argv))