Ejemplo n.º 1
0
def main(path, pynuxrc="~/.pynuxrc", replace=True, loglevel=_loglevel_):
    # set up logging
    numeric_level = getattr(logging, loglevel, None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    # log to stdout/err to capture in parent process log
    logging.basicConfig(
        level=numeric_level,
        format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p',
        stream=sys.stderr)
    logger = logging.getLogger(__name__)
    logger.info('path: {}'.format(path))

    stash = Stash(path, pynuxrc, replace)

    # stash images for use with iiif server
    image_report = stash.images()
    info = 'finished stashing images'
    logger.info(info)

    # stash text, audio, video
    file_report = stash.files()
    info = 'finished stashing files'
    logger.info(info)

    # stash thumbnails for text, audio, video
    thumb_report = stash.thumbnails()
    info = 'finished stashing thumbnails'
    logger.info(info)

    # stash media.json files
    mediajson_report = stash.media_json()
    info = 'finished stashing media.json'
    logger.info(info)

    # print some information about how it went
    images_stashed = len(
        [key for key, value in image_report.iteritems() if value['stashed']])
    files_stashed = len(
        [key for key, value in file_report.iteritems() if value['stashed']])
    thumbs_stashed = len(
        [key for key, value in thumb_report.iteritems() if value['stashed']])
    mediajson_stashed = len([
        key for key, value in mediajson_report.iteritems() if value['stashed']
    ])

    # TODO: make sure this is in rqworker log
    summary = ''.join((
        "SUMMARY:\n",
        "objects processed:              {}\n".format(len(stash.objects)),
        "replaced existing files on s3:  {}\n".format(stash.replace),
        "images stashed:                 {}\n".format(images_stashed),
        "files stashed:                  {}\n".format(files_stashed),
        "thumbnails stashed:             {}\n".format(thumbs_stashed),
        "media.json files stashed:       {}\n".format(mediajson_stashed),
    ))
    print(summary)
    publish_to_harvesting('Deep Harvest for {} done'.format(path), summary)
def stash_file(path, replace=True):
    '''Stash a single file to s3'''
    bucket = 'ucldc-nuxeo-ref-media'
    region = 'us-west-2'
    pynuxrc = '~/.pynuxrc'
    path = unicode(path, "utf-8") if not isinstance(path, unicode) else path
    nxstash = NuxeoStashFile(path, bucket, region, pynuxrc, replace)
    report = nxstash.nxstashref()
    publish_to_harvesting('Stashed file for {}'.format(path),
                          json.dumps(report))
def stash_thumb(path, replace=True):
    '''Stash single thumb'''
    bucket = 'static.ucldc.cdlib.org/ucldc-nuxeo-thumb-media'
    region = 'us-east-1'
    pynuxrc = '~/.pynuxrc'
    path = unicode(path, "utf-8") if not isinstance(path, unicode) else path
    nxstash = NuxeoStashThumb(path, bucket, region, pynuxrc, replace)
    report = nxstash.nxstashref()
    publish_to_harvesting('Stashed thumb for {}'.format(path),
                          json.dumps(report))
def stash_image(path, replace=True):
    '''Stash a single image to s3'''
    bucket = 'ucldc-private-files/jp2000'
    region = 'us-west-2'
    pynuxrc = '~/.pynuxrc'
    path = unicode(path, "utf-8") if not isinstance(path, unicode) else path
    nxstash = NuxeoStashImage(path, bucket, region, pynuxrc, replace)
    report = nxstash.nxstashref()
    publish_to_harvesting('Stashed image for {}'.format(path),
                          json.dumps(report))
def stash_media_json(path, replace=True):
    '''Stash media json for object'''

    bucket = 'static.ucldc.cdlib.org/media_json'
    region = 'us-east-1'
    pynuxrc = '~/.pynuxrc'
    path = unicode(path, "utf-8") if not isinstance(path, unicode) else path
    nxstash = NuxeoStashMediaJson(path, bucket, region, pynuxrc, replace)
    report = nxstash.nxstashref()
    publish_to_harvesting('Stashed media_json for {}'.format(path),
                          json.dumps(report))
Ejemplo n.º 6
0
def main(path, pynuxrc="~/.pynuxrc", replace=True):

    # logging
    # FIXME would like to name log with nuxeo UID
    filename = os.path.basename(path)
    logfile = "logs/{}.log".format(filename)
    print "LOG:\t{}".format(logfile)
    logging.basicConfig(
        filename=logfile,
        level=logging.INFO,
        format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p')
    logger = logging.getLogger(__name__)

    stash = Stash(path, pynuxrc, replace)

    filename = os.path.basename(path)

    # stash images for use with iiif server
    print 'stashing images...'
    image_report = stash.images()
    info = 'finished stashing images'
    logger.info(info)
    print info
    report_file = "images-{}.json".format(filename)
    s3_report(report_file, image_report)
    print "report:\t{}\n".format(report_file)

    # stash text, audio, video
    print 'stashing non-image files (text, audio, video)...'
    file_report = stash.files()
    info = 'finished stashing files'
    logger.info(info)
    print info
    report_file = "files-{}.json".format(filename)
    s3_report(report_file, file_report)
    print "report:\t{}\n".format(report_file)

    # stash thumbnails for text, audio, video
    print 'stashing thumbnails for non-image files (text, audio, video)...'
    thumb_report = stash.thumbnails()
    info = 'finished stashing thumbnails'
    logger.info(info)
    print info
    report_file = "thumbs-{}.json".format(filename)
    s3_report(report_file, thumb_report)
    print "report:\t{}\n".format(report_file)

    # stash media.json files
    print 'stashing media.json files for collection...'
    mediajson_report = stash.media_json()
    info = 'finished stashing media.json'
    logger.info(info)
    print info
    report_file = "mediajson-{}.json".format(filename)
    s3_report(report_file, mediajson_report)
    print "report:\t{}\n".format(report_file)

    # print some information about how it went
    images_stashed = len(
        [key for key, value in image_report.iteritems() if value['stashed']])
    files_stashed = len(
        [key for key, value in file_report.iteritems() if value['stashed']])
    thumbs_stashed = len(
        [key for key, value in thumb_report.iteritems() if value['stashed']])
    mediajson_stashed = len([
        key for key, value in mediajson_report.iteritems() if value['stashed']
    ])

    # TODO: make sure this is in rqworker log
    summary = ''.join((
        "SUMMARY:\n",
        "objects processed:              {}\n".format(len(stash.objects)),
        "replaced existing files on s3:  {}\n".format(stash.replace),
        "images stashed:                 {}\n".format(images_stashed),
        "files stashed:                  {}\n".format(files_stashed),
        "thumbnails stashed:             {}\n".format(thumbs_stashed),
        "media.json files stashed:       {}\n".format(mediajson_stashed),
    ))
    print summary
    publish_to_harvesting('Deep Harvest for {} done'.format(path), summary)
Ejemplo n.º 7
0
def main(registry_id, pynuxrc="~/.pynuxrc", replace=True, loglevel=_loglevel_):
    # set up logging
    logfile = 'logs/stash_collection_{}'.format(registry_id)
    numeric_level = getattr(logging, loglevel, None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    # log to stdout/err to capture in parent process log
    # TODO: save log to S3
    logging.basicConfig(
        level=numeric_level,
        format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p',
        stream=sys.stderr)
    logger = logging.getLogger(__name__)

    # get nuxeo path
    nxpath = s3stash.s3tools.get_nuxeo_path(registry_id)
    if nxpath is None:
        print "No record found for registry_id: {}".format(registry_id)
        sys.exit()
    info = 'nuxeo_path: {}'.format(nxpath)
    logger.info(info)
    print info, '\n'

    stash = Stash(nxpath, pynuxrc, replace)

    # stash images for use with iiif server
    print 'stashing images...'
    image_report = stash.images()
    info = 'finished stashing images'
    logger.info(info)
    print info
    report_file = "images-{}.json".format(registry_id)
    s3_report(report_file, image_report)
    print "report:\t{}\n".format(report_file)

    # stash text, audio, video
    print 'stashing non-image files (text, audio, video)...'
    file_report = stash.files()
    info = 'finished stashing files'
    logger.info(info)
    print info
    report_file = "files-{}.json".format(registry_id)
    s3_report(report_file, file_report)
    print "report:\t{}\n".format(report_file)

    # stash thumbnails for text, audio, video
    print 'stashing thumbnails for non-image files (text, audio, video)...'
    thumb_report = stash.thumbnails()
    info = 'finished stashing thumbnails'
    logger.info(info)
    print info
    report_file = "thumbs-{}.json".format(registry_id)
    s3_report(report_file, thumb_report)
    print "report:\t{}\n".format(report_file)

    # stash media.json files
    print 'stashing media.json files for collection...'
    mediajson_report = stash.media_json()
    info = 'finished stashing media.json'
    logger.info(info)
    print info
    report_file = "mediajson-{}.json".format(registry_id)
    s3_report(report_file, mediajson_report)
    print "report:\t{}\n".format(report_file)

    # print some information about how it went
    images_stashed = len(
        [key for key, value in image_report.iteritems() if value['stashed']])
    files_stashed = len(
        [key for key, value in file_report.iteritems() if value['stashed']])
    thumbs_stashed = len(
        [key for key, value in thumb_report.iteritems() if value['stashed']])
    mediajson_stashed = len([
        key for key, value in mediajson_report.iteritems() if value['stashed']
    ])

    # TODO: make sure this is in rqworker log
    summary = ''.join((
        "SUMMARY:\n",
        "objects processed:              {}\n".format(len(stash.objects)),
        "replaced existing files on s3:  {}\n".format(stash.replace),
        "images stashed:                 {}\n".format(images_stashed),
        "files stashed:                  {}\n".format(files_stashed),
        "thumbnails stashed:             {}\n".format(thumbs_stashed),
        "media.json files stashed:       {}\n".format(mediajson_stashed),
    ))
    print summary
    publish_to_harvesting('Deep Harvest for {} done'.format(registry_id),
                          summary)
def main(registry_id, pynuxrc="~/.pynuxrc", replace=True, loglevel=_loglevel_):
    # set up logging
    logfile = 'logs/stash_collection_{}'.format(registry_id)
    numeric_level = getattr(logging, loglevel, None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    # log to stdout/err to capture in parent process log
    # TODO: save log to S3
    logging.basicConfig(
        level=numeric_level,
        format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p',
        stream=sys.stderr)
    logger = logging.getLogger(__name__)

    # get nuxeo path
    nxpath = s3stash.s3tools.get_nuxeo_path(registry_id)
    if nxpath is None:
        print "No record found for registry_id: {}".format(registry_id)
        sys.exit()
    info = 'nuxeo_path: {}'.format(nxpath)
    logger.info(info)
    print info, '\n'

    stash = Stash(nxpath, pynuxrc, replace)

    # stash images for use with iiif server
    print 'stashing images...'
    image_report = stash.images()
    info = 'finished stashing images'
    logger.info(info)
    print info
    report_file = "images-{}.json".format(registry_id)
    s3_report(report_file, image_report)
    print "report:\t{}\n".format(report_file)

    # stash text, audio, video
    print 'stashing non-image files (text, audio, video)...'
    file_report = stash.files()
    info = 'finished stashing files'
    logger.info(info)
    print info
    report_file = "files-{}.json".format(registry_id)
    s3_report(report_file, file_report)
    print "report:\t{}\n".format(report_file)

    # stash thumbnails for text, audio, video
    print 'stashing thumbnails for non-image files (text, audio, video)...'
    thumb_report = stash.thumbnails()
    info = 'finished stashing thumbnails'
    logger.info(info)
    print info
    report_file = "thumbs-{}.json".format(registry_id)
    s3_report(report_file, thumb_report)
    print "report:\t{}\n".format(report_file)

    # stash media.json files
    print 'stashing media.json files for collection...'
    mediajson_report = stash.media_json()
    info = 'finished stashing media.json'
    logger.info(info)
    print info
    report_file = "mediajson-{}.json".format(registry_id)
    s3_report(report_file, mediajson_report)
    print "report:\t{}\n".format(report_file)

    # print some information about how it went
    images_stashed = len(
        [key for key, value in image_report.iteritems() if value['stashed']])
    files_stashed = len(
        [key for key, value in file_report.iteritems() if value['stashed']])
    thumbs_stashed = len(
        [key for key, value in thumb_report.iteritems() if value['stashed']])
    mediajson_stashed = len([
        key for key, value in mediajson_report.iteritems() if value['stashed']
    ])

    # TODO: make sure this is in rqworker log
    summary = ''.join((
        "SUMMARY:\n",
        "objects processed:              {}\n".format(len(stash.objects)),
        "replaced existing files on s3:  {}\n".format(stash.replace),
        "images stashed:                 {}\n".format(images_stashed),
        "files stashed:                  {}\n".format(files_stashed),
        "thumbnails stashed:             {}\n".format(thumbs_stashed),
        "media.json files stashed:       {}\n".format(mediajson_stashed),
        )
    )
    print summary
    publish_to_harvesting('Deep Harvest for {} done'.format(registry_id),
                          summary)
def main(path, pynuxrc="~/.pynuxrc", replace=True):

    # logging
    # FIXME would like to name log with nuxeo UID
    filename = os.path.basename(path)
    logfile = "logs/{}.log".format(filename)
    print "LOG:\t{}".format(logfile)
    logging.basicConfig(
        filename=logfile,
        level=logging.INFO,
        format='%(asctime)s (%(name)s) [%(levelname)s]: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p')
    logger = logging.getLogger(__name__)

    stash = Stash(path, pynuxrc, replace)
 
    filename = os.path.basename(path)

    # stash images for use with iiif server
    print 'stashing images...'
    image_report = stash.images()
    info = 'finished stashing images'
    logger.info(info)
    print info
    report_file = "images-{}.json".format(filename)
    s3_report(report_file, image_report)
    print "report:\t{}\n".format(report_file)

    # stash text, audio, video
    print 'stashing non-image files (text, audio, video)...'
    file_report = stash.files()
    info = 'finished stashing files'
    logger.info(info)
    print info
    report_file = "files-{}.json".format(filename)
    s3_report(report_file, file_report)
    print "report:\t{}\n".format(report_file)

    # stash thumbnails for text, audio, video
    print 'stashing thumbnails for non-image files (text, audio, video)...'
    thumb_report = stash.thumbnails()
    info = 'finished stashing thumbnails'
    logger.info(info)
    print info
    report_file = "thumbs-{}.json".format(filename)
    s3_report(report_file, thumb_report)
    print "report:\t{}\n".format(report_file)

    # stash media.json files
    print 'stashing media.json files for collection...'
    mediajson_report = stash.media_json()
    info = 'finished stashing media.json'
    logger.info(info)
    print info
    report_file = "mediajson-{}.json".format(filename)
    s3_report(report_file, mediajson_report)
    print "report:\t{}\n".format(report_file)

    # print some information about how it went
    images_stashed = len(
        [key for key, value in image_report.iteritems() if value['stashed']])
    files_stashed = len(
        [key for key, value in file_report.iteritems() if value['stashed']])
    thumbs_stashed = len(
        [key for key, value in thumb_report.iteritems() if value['stashed']])
    mediajson_stashed = len([
        key for key, value in mediajson_report.iteritems() if value['stashed']
    ])

    # TODO: make sure this is in rqworker log
    summary = ''.join((
        "SUMMARY:\n",
        "objects processed:              {}\n".format(len(stash.objects)),
        "replaced existing files on s3:  {}\n".format(stash.replace),
        "images stashed:                 {}\n".format(images_stashed),
        "files stashed:                  {}\n".format(files_stashed),
        "thumbnails stashed:             {}\n".format(thumbs_stashed),
        "media.json files stashed:       {}\n".format(mediajson_stashed),
        )
    )
    print summary
    publish_to_harvesting('Deep Harvest for {} done'.format(path),
                          summary)