def main(path, pynuxrc="~/.pynuxrc", replace=True, loglevel=_loglevel_): # set up logging numeric_level = getattr(logging, loglevel, None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) # log to stdout/err to capture in parent process log logging.basicConfig( level=numeric_level, format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', stream=sys.stderr) logger = logging.getLogger(__name__) logger.info('path: {}'.format(path)) stash = Stash(path, pynuxrc, replace) # stash images for use with iiif server image_report = stash.images() info = 'finished stashing images' logger.info(info) # stash text, audio, video file_report = stash.files() info = 'finished stashing files' logger.info(info) # stash thumbnails for text, audio, video thumb_report = stash.thumbnails() info = 'finished stashing thumbnails' logger.info(info) # stash media.json files mediajson_report = stash.media_json() info = 'finished stashing media.json' logger.info(info) # print some information about how it went images_stashed = len( [key for key, value in image_report.iteritems() if value['stashed']]) files_stashed = len( [key for key, value in file_report.iteritems() if value['stashed']]) thumbs_stashed = len( [key for key, value in thumb_report.iteritems() if value['stashed']]) mediajson_stashed = len([ key for key, value in mediajson_report.iteritems() if value['stashed'] ]) # TODO: make sure this is in rqworker log summary = ''.join(( "SUMMARY:\n", "objects processed: {}\n".format(len(stash.objects)), "replaced existing files on s3: {}\n".format(stash.replace), "images stashed: {}\n".format(images_stashed), "files stashed: {}\n".format(files_stashed), "thumbnails stashed: {}\n".format(thumbs_stashed), "media.json files stashed: {}\n".format(mediajson_stashed), )) print(summary) publish_to_harvesting('Deep Harvest for {} done'.format(path), summary)
def stash_file(path, replace=True): '''Stash a single file to s3''' bucket = 'ucldc-nuxeo-ref-media' region = 'us-west-2' pynuxrc = '~/.pynuxrc' path = unicode(path, "utf-8") if not isinstance(path, unicode) else path nxstash = NuxeoStashFile(path, bucket, region, pynuxrc, replace) report = nxstash.nxstashref() publish_to_harvesting('Stashed file for {}'.format(path), json.dumps(report))
def stash_thumb(path, replace=True): '''Stash single thumb''' bucket = 'static.ucldc.cdlib.org/ucldc-nuxeo-thumb-media' region = 'us-east-1' pynuxrc = '~/.pynuxrc' path = unicode(path, "utf-8") if not isinstance(path, unicode) else path nxstash = NuxeoStashThumb(path, bucket, region, pynuxrc, replace) report = nxstash.nxstashref() publish_to_harvesting('Stashed thumb for {}'.format(path), json.dumps(report))
def stash_image(path, replace=True): '''Stash a single image to s3''' bucket = 'ucldc-private-files/jp2000' region = 'us-west-2' pynuxrc = '~/.pynuxrc' path = unicode(path, "utf-8") if not isinstance(path, unicode) else path nxstash = NuxeoStashImage(path, bucket, region, pynuxrc, replace) report = nxstash.nxstashref() publish_to_harvesting('Stashed image for {}'.format(path), json.dumps(report))
def stash_media_json(path, replace=True): '''Stash media json for object''' bucket = 'static.ucldc.cdlib.org/media_json' region = 'us-east-1' pynuxrc = '~/.pynuxrc' path = unicode(path, "utf-8") if not isinstance(path, unicode) else path nxstash = NuxeoStashMediaJson(path, bucket, region, pynuxrc, replace) report = nxstash.nxstashref() publish_to_harvesting('Stashed media_json for {}'.format(path), json.dumps(report))
def main(path, pynuxrc="~/.pynuxrc", replace=True): # logging # FIXME would like to name log with nuxeo UID filename = os.path.basename(path) logfile = "logs/{}.log".format(filename) print "LOG:\t{}".format(logfile) logging.basicConfig( filename=logfile, level=logging.INFO, format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') logger = logging.getLogger(__name__) stash = Stash(path, pynuxrc, replace) filename = os.path.basename(path) # stash images for use with iiif server print 'stashing images...' image_report = stash.images() info = 'finished stashing images' logger.info(info) print info report_file = "images-{}.json".format(filename) s3_report(report_file, image_report) print "report:\t{}\n".format(report_file) # stash text, audio, video print 'stashing non-image files (text, audio, video)...' file_report = stash.files() info = 'finished stashing files' logger.info(info) print info report_file = "files-{}.json".format(filename) s3_report(report_file, file_report) print "report:\t{}\n".format(report_file) # stash thumbnails for text, audio, video print 'stashing thumbnails for non-image files (text, audio, video)...' thumb_report = stash.thumbnails() info = 'finished stashing thumbnails' logger.info(info) print info report_file = "thumbs-{}.json".format(filename) s3_report(report_file, thumb_report) print "report:\t{}\n".format(report_file) # stash media.json files print 'stashing media.json files for collection...' mediajson_report = stash.media_json() info = 'finished stashing media.json' logger.info(info) print info report_file = "mediajson-{}.json".format(filename) s3_report(report_file, mediajson_report) print "report:\t{}\n".format(report_file) # print some information about how it went images_stashed = len( [key for key, value in image_report.iteritems() if value['stashed']]) files_stashed = len( [key for key, value in file_report.iteritems() if value['stashed']]) thumbs_stashed = len( [key for key, value in thumb_report.iteritems() if value['stashed']]) mediajson_stashed = len([ key for key, value in mediajson_report.iteritems() if value['stashed'] ]) # TODO: make sure this is in rqworker log summary = ''.join(( "SUMMARY:\n", "objects processed: {}\n".format(len(stash.objects)), "replaced existing files on s3: {}\n".format(stash.replace), "images stashed: {}\n".format(images_stashed), "files stashed: {}\n".format(files_stashed), "thumbnails stashed: {}\n".format(thumbs_stashed), "media.json files stashed: {}\n".format(mediajson_stashed), )) print summary publish_to_harvesting('Deep Harvest for {} done'.format(path), summary)
def main(registry_id, pynuxrc="~/.pynuxrc", replace=True, loglevel=_loglevel_): # set up logging logfile = 'logs/stash_collection_{}'.format(registry_id) numeric_level = getattr(logging, loglevel, None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) # log to stdout/err to capture in parent process log # TODO: save log to S3 logging.basicConfig( level=numeric_level, format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', stream=sys.stderr) logger = logging.getLogger(__name__) # get nuxeo path nxpath = s3stash.s3tools.get_nuxeo_path(registry_id) if nxpath is None: print "No record found for registry_id: {}".format(registry_id) sys.exit() info = 'nuxeo_path: {}'.format(nxpath) logger.info(info) print info, '\n' stash = Stash(nxpath, pynuxrc, replace) # stash images for use with iiif server print 'stashing images...' image_report = stash.images() info = 'finished stashing images' logger.info(info) print info report_file = "images-{}.json".format(registry_id) s3_report(report_file, image_report) print "report:\t{}\n".format(report_file) # stash text, audio, video print 'stashing non-image files (text, audio, video)...' file_report = stash.files() info = 'finished stashing files' logger.info(info) print info report_file = "files-{}.json".format(registry_id) s3_report(report_file, file_report) print "report:\t{}\n".format(report_file) # stash thumbnails for text, audio, video print 'stashing thumbnails for non-image files (text, audio, video)...' thumb_report = stash.thumbnails() info = 'finished stashing thumbnails' logger.info(info) print info report_file = "thumbs-{}.json".format(registry_id) s3_report(report_file, thumb_report) print "report:\t{}\n".format(report_file) # stash media.json files print 'stashing media.json files for collection...' mediajson_report = stash.media_json() info = 'finished stashing media.json' logger.info(info) print info report_file = "mediajson-{}.json".format(registry_id) s3_report(report_file, mediajson_report) print "report:\t{}\n".format(report_file) # print some information about how it went images_stashed = len( [key for key, value in image_report.iteritems() if value['stashed']]) files_stashed = len( [key for key, value in file_report.iteritems() if value['stashed']]) thumbs_stashed = len( [key for key, value in thumb_report.iteritems() if value['stashed']]) mediajson_stashed = len([ key for key, value in mediajson_report.iteritems() if value['stashed'] ]) # TODO: make sure this is in rqworker log summary = ''.join(( "SUMMARY:\n", "objects processed: {}\n".format(len(stash.objects)), "replaced existing files on s3: {}\n".format(stash.replace), "images stashed: {}\n".format(images_stashed), "files stashed: {}\n".format(files_stashed), "thumbnails stashed: {}\n".format(thumbs_stashed), "media.json files stashed: {}\n".format(mediajson_stashed), )) print summary publish_to_harvesting('Deep Harvest for {} done'.format(registry_id), summary)
def main(registry_id, pynuxrc="~/.pynuxrc", replace=True, loglevel=_loglevel_): # set up logging logfile = 'logs/stash_collection_{}'.format(registry_id) numeric_level = getattr(logging, loglevel, None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) # log to stdout/err to capture in parent process log # TODO: save log to S3 logging.basicConfig( level=numeric_level, format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', stream=sys.stderr) logger = logging.getLogger(__name__) # get nuxeo path nxpath = s3stash.s3tools.get_nuxeo_path(registry_id) if nxpath is None: print "No record found for registry_id: {}".format(registry_id) sys.exit() info = 'nuxeo_path: {}'.format(nxpath) logger.info(info) print info, '\n' stash = Stash(nxpath, pynuxrc, replace) # stash images for use with iiif server print 'stashing images...' image_report = stash.images() info = 'finished stashing images' logger.info(info) print info report_file = "images-{}.json".format(registry_id) s3_report(report_file, image_report) print "report:\t{}\n".format(report_file) # stash text, audio, video print 'stashing non-image files (text, audio, video)...' file_report = stash.files() info = 'finished stashing files' logger.info(info) print info report_file = "files-{}.json".format(registry_id) s3_report(report_file, file_report) print "report:\t{}\n".format(report_file) # stash thumbnails for text, audio, video print 'stashing thumbnails for non-image files (text, audio, video)...' thumb_report = stash.thumbnails() info = 'finished stashing thumbnails' logger.info(info) print info report_file = "thumbs-{}.json".format(registry_id) s3_report(report_file, thumb_report) print "report:\t{}\n".format(report_file) # stash media.json files print 'stashing media.json files for collection...' mediajson_report = stash.media_json() info = 'finished stashing media.json' logger.info(info) print info report_file = "mediajson-{}.json".format(registry_id) s3_report(report_file, mediajson_report) print "report:\t{}\n".format(report_file) # print some information about how it went images_stashed = len( [key for key, value in image_report.iteritems() if value['stashed']]) files_stashed = len( [key for key, value in file_report.iteritems() if value['stashed']]) thumbs_stashed = len( [key for key, value in thumb_report.iteritems() if value['stashed']]) mediajson_stashed = len([ key for key, value in mediajson_report.iteritems() if value['stashed'] ]) # TODO: make sure this is in rqworker log summary = ''.join(( "SUMMARY:\n", "objects processed: {}\n".format(len(stash.objects)), "replaced existing files on s3: {}\n".format(stash.replace), "images stashed: {}\n".format(images_stashed), "files stashed: {}\n".format(files_stashed), "thumbnails stashed: {}\n".format(thumbs_stashed), "media.json files stashed: {}\n".format(mediajson_stashed), ) ) print summary publish_to_harvesting('Deep Harvest for {} done'.format(registry_id), summary)
def main(path, pynuxrc="~/.pynuxrc", replace=True): # logging # FIXME would like to name log with nuxeo UID filename = os.path.basename(path) logfile = "logs/{}.log".format(filename) print "LOG:\t{}".format(logfile) logging.basicConfig( filename=logfile, level=logging.INFO, format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') logger = logging.getLogger(__name__) stash = Stash(path, pynuxrc, replace) filename = os.path.basename(path) # stash images for use with iiif server print 'stashing images...' image_report = stash.images() info = 'finished stashing images' logger.info(info) print info report_file = "images-{}.json".format(filename) s3_report(report_file, image_report) print "report:\t{}\n".format(report_file) # stash text, audio, video print 'stashing non-image files (text, audio, video)...' file_report = stash.files() info = 'finished stashing files' logger.info(info) print info report_file = "files-{}.json".format(filename) s3_report(report_file, file_report) print "report:\t{}\n".format(report_file) # stash thumbnails for text, audio, video print 'stashing thumbnails for non-image files (text, audio, video)...' thumb_report = stash.thumbnails() info = 'finished stashing thumbnails' logger.info(info) print info report_file = "thumbs-{}.json".format(filename) s3_report(report_file, thumb_report) print "report:\t{}\n".format(report_file) # stash media.json files print 'stashing media.json files for collection...' mediajson_report = stash.media_json() info = 'finished stashing media.json' logger.info(info) print info report_file = "mediajson-{}.json".format(filename) s3_report(report_file, mediajson_report) print "report:\t{}\n".format(report_file) # print some information about how it went images_stashed = len( [key for key, value in image_report.iteritems() if value['stashed']]) files_stashed = len( [key for key, value in file_report.iteritems() if value['stashed']]) thumbs_stashed = len( [key for key, value in thumb_report.iteritems() if value['stashed']]) mediajson_stashed = len([ key for key, value in mediajson_report.iteritems() if value['stashed'] ]) # TODO: make sure this is in rqworker log summary = ''.join(( "SUMMARY:\n", "objects processed: {}\n".format(len(stash.objects)), "replaced existing files on s3: {}\n".format(stash.replace), "images stashed: {}\n".format(images_stashed), "files stashed: {}\n".format(files_stashed), "thumbnails stashed: {}\n".format(thumbs_stashed), "media.json files stashed: {}\n".format(mediajson_stashed), ) ) print summary publish_to_harvesting('Deep Harvest for {} done'.format(path), summary)