def main(argv): args = docopt(__doc__, argv=argv) verbose = args['--verbose'] item = Item(args['<identifier>']) # Files that cannot be deleted via S3. no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite'] if verbose: stdout.write('Deleting files from {0}\n'.format(item.identifier)) if args['--all']: files = [f for f in item.files()] args['--cacade'] = True else: files = [item.file(f) for f in args['<file>']] for f in files: if not f: if verbose: stderr.write(' error: "{0}" does not exist\n'.format(f.name)) exit(1) if any(f.name.endswith(s) for s in no_delete): continue resp = f.delete(verbose=args['--verbose'], cascade_delete=args['--cascade']) if resp.status_code != 204: error = parseString(resp.content) msg = get_xml_text(error.getElementsByTagName('Message')) stderr.write(' error: {0} ({1})\n'.format(msg, resp.status_code)) exit(1)
def _metadata_getter(self): while True: i, identifier = self.input_queue.get() try: item = Item(identifier) self.json_queue.put((i, item)) except: self.input_queue.put((i, identifier)) finally: self.input_queue.task_done()
#!/usr/bin/env python import os import json import ptree from internetarchive import search_items, Item total_bytes = 0 for result in search_items('collection:usda-nurseryandseedcatalog'): id = result['identifier'] item = Item(id) metadata = item.get_metadata() item_dir = os.path.join('items', ptree.id2ptree(id).lstrip("/")) if not os.path.isdir(item_dir): os.makedirs(item_dir) with open(os.path.join(item_dir, 'metadata.json'), 'w') as fh: fh.write(json.dumps(metadata, indent=2)) total_bytes += sum([f.size for f in item.iter_files()]) print item_dir print total_bytes
#!/usr/bin/env python """ fetch.py will fetch metadata for the Seed Catalog collection at Internet Archive. """ import os import json import ptree from internetarchive import search_items, Item for result in search_items('collection:usda-nurseryandseedcatalog'): id = result['identifier'] item = Item(id) metadata = item.get_metadata() item_dir = os.path.join('items', ptree.id2ptree(id).lstrip("/")) if not os.path.isdir(item_dir): os.makedirs(item_dir) with open(os.path.join(item_dir, 'metadata.json'), 'w') as fh: fh.write(json.dumps(metadata, indent=2)) print item_dir