def main(argv=None): parser = argparse.ArgumentParser( description='extent stats via Nuxeo REST API') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.nxql( 'select * from Document where ecm:path startswith "/asset-library/UCM"' ) duplicates = defaultdict(list) row = 0 for document in documents: for blob in blob_from_doc(document): if blob: duplicates[blob['digest']].append( (blob['uid'], u'{0}#{1}'.format(blob['path'], blob['xpath']).encode('utf-8'))) if row % 25000 == 0: print '{0} blobs checked'.format(row) row = row + 1 duplicates = {k: v for k, v in duplicates.items() if len(v) > 1} # http://stackoverflow.com/a/8425075 pp(duplicates) print(len(duplicates))
def main(argv=None): parser = argparse.ArgumentParser(description='check for existence of jp2 file on s3 for given nuxeo path') parser.add_argument('path', help="Nuxeo document path") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nuxeo_path = argv.path print "\nnuxeo_path:", nuxeo_path # get the Nuxeo ID nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) nuxeo_id = nx.get_uid(nuxeo_path) print "nuxeo_id:", nuxeo_id download_url = get_download_url(nuxeo_id, nuxeo_path, nx) print download_url, '\n' filename = os.path.basename(nuxeo_path) filepath = os.path.join(os.getcwd(), filename) download_nuxeo_file(download_url, filepath, nx) print "\nDone\n"
def main(argv=None): parser = argparse.ArgumentParser(description='check for existence of jp2 file on s3 for given nuxeo path') parser.add_argument('path', help="Nuxeo document path") parser.add_argument('bucket', help="S3 bucket name") parser.add_argument('--pynuxrc', default='~/.pynux-prod', help="rc file for use by pynux") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nuxeo_path = argv.path bucketpath = argv.bucket nx = utils.Nuxeo(rcfile=argv.pynuxrc, loglevel=argv.loglevel.upper()) # just for simple objects for now objects = nx.children(argv.path) print "\nFound objects at {}.\nChecking S3 bucket {} for existence of corresponding files.\nThis could take a while...".format(nuxeo_path, bucketpath) i = 0 for obj in objects: nuxeo_id = nx.get_uid(obj['path']) check_object_on_s3(nuxeo_id, bucketpath) i = i + 1 print "Done. Checked {} objects".format(i)
def main(argv=None): parser = argparse.ArgumentParser(description='convert an object to jp2') parser.add_argument('path', help="Nuxeo document path") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() print argv.path nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
def main(argv=None): parser = argparse.ArgumentParser( description='nuxeo platform importer status') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) print nx.call_file_importer_api('status')
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( "--datafile", type=str, required=True, help="tab-delimited spreadsheet input file -- required") parser.add_argument('-d', '--dry-run', action='store_true', help='dry run') parser.add_argument('--blankout', action='store_true', help='blank out all fields not set in sheet') utils.get_common_options(parser) args = parser.parse_args() try: assert os.path.isfile(args.datafile) except AssertionError: print("Not a file: ", args.datafile) sys.exit(2) csv_data_file = args.datafile print(csv_data_file) print(args.rcfile) print(args.loglevel) nx = utils.Nuxeo(rcfile=args.rcfile, loglevel=args.loglevel.upper()) nuxeo_limit = 24 # get and instance of the Csv2Dict class which must be initialized # with the name of an input data (csv) file csv2dict = Csv2Dict(csv_data_file, blankout=args.blankout) if csv2dict.status != 0: print('The Csv2Dict constructor reported and error (%d).' % csv2dict.status) sys.exit(csv2dict.status) process_rows(csv2dict) for n in range(csv2dict.get_meta_dict_length()): print("Loading payload %d" % n) payload = csv2dict.get_meta_dict(n) print(payload) print(payload['path']) if not args.dry_run: uid = nx.get_uid(payload['path']) print("Returned UID: %d) %s." % (n, uid)) nx.update_nuxeo_properties(payload, path=payload['path'])
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # http://demo.nuxeo.com/nuxeo/api/v1/path/default-domain/@search\?query\=SELECT%20\*%20FROM%20Document\&pageSize\=2\¤tPageIndex\=1 | jq . # todo; add these defaults as parameters as well as env nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.all() nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument('path', nargs=1, help="nuxeo document path") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.children(argv.path[0]) # open the workbook workbook = xlsxwriter.Workbook('qa.xlsx') header_format = workbook.add_format({'bold': True, }) report = workbook.add_worksheet() report.set_column(0, 0, 10, ) report.set_column(1, 2, 40, ) report.set_column(3, 4, 80, ) report.write(0, 0, 'nuxeo-uid', header_format) report.write(0, 1, 'ucldc_schema:localidentifier', header_format) report.write(0, 2, 'filename', header_format) report.write(0, 3, 'nuxeo-path', header_format) report.write(0, 4, 'title', header_format) # document specified on command line root_doc = nx.get_metadata(path=argv.path[0]) report.write(1, 0, root_doc['uid']) report.write(1, 3, argv.path[0]) row = 2 for document in documents: p = document['properties'] report.write(row, 0, document['uid']) report.write(row, 1, p['ucldc_schema:localidentifier'][0]) if 'file:filename' in p: report.write(row, 2, p['file:filename']) report.write(row, 3, document['path'].replace(argv.path[0], '', 1)) report.write(row, 4, document['title']) row = row + 1 # output # path|localid|title # workbook.close()
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument('path', nargs=1, help='nuxeo document path', type=utf8_arg) parser.add_argument( '--outdir', help="directory to hold application/json+nxentity .json files", type=utf8_arg) rstyle = parser.add_mutually_exclusive_group(required=False) rstyle.add_argument('--recursive-folders', help='recursively list project folders/Organzation', action='store_true') rstyle.add_argument('--recursive-objects', help='recursively list objects', action='store_true') show = parser.add_mutually_exclusive_group(required=False) show.add_argument('--show-only-uid', action='store_true') show.add_argument('--show-only-path', action='store_true') show.add_argument('--show-custom-function') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) if argv.recursive_folders: documents = nx.recursive_project_folders(argv.path[0]) elif argv.recursive_objects: documents = nx.recursive_objects(argv.path[0]) else: documents = itertools.chain( nx.nxql(u'select * from Document where ecm:path="{}"'.format( argv.path[0])), nx.children(argv.path[0])) if argv.outdir: # Expand user- and relative-paths outdir = os.path.abspath(os.path.expanduser(argv.outdir)) nx.copy_metadata_to_local(documents, outdir) elif argv.show_only_path is True: for document in documents: print(document['path']) elif argv.show_only_uid is True: for document in documents: print(document['uid']) elif argv.show_custom_function: mapper = importlib.import_module(argv.show_custom_function) mapper.nuxeo_mapper(documents, nx) else: nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser( description='nuxeo platform importer log/logActivate') parser.add_argument('--activate', action='store_true') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) if argv.activate: nx.import_log_activate() else: nx.import_log()
def main(argv=None): parser = argparse.ArgumentParser( description='Print nuxeo json metadata for object.') parser.add_argument('path', help="Nuxeo document path") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() path = argv.path nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) uid = nx.get_uid(path) metadata = nx.get_metadata(uid=uid)
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument('path', nargs=1, help='nuxeo document path', type=utf8_arg) parser.add_argument('--outdir', help="directory to hold application/json+nxentity .json files", type=utf8_arg) rstyle = parser.add_mutually_exclusive_group(required=False) rstyle.add_argument('--recursive-folders', help='recursively list project folders/Organzation', action='store_true') rstyle.add_argument('--recursive-objects', help='recursively list objects', action='store_true') show = parser.add_mutually_exclusive_group(required=False) show.add_argument('--show-only-uid', action='store_true') show.add_argument('--show-only-path', action='store_true') show.add_argument('--show-custom-function') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) if argv.recursive_folders: documents = nx.recursive_project_folders(argv.path[0]) elif argv.recursive_objects: documents = nx.recursive_objects(argv.path[0]) else: documents = itertools.chain( nx.nxql(u'select * from Document where ecm:path="{}"'.format(argv.path[0])), nx.children(argv.path[0]) ) if argv.outdir: # Expand user- and relative-paths outdir = os.path.abspath(os.path.expanduser(argv.outdir)) nx.copy_metadata_to_local(documents, outdir) elif argv.show_only_path == True: for document in documents: print(document['path']) elif argv.show_only_uid == True: for document in documents: print(document['uid']) elif argv.show_custom_function: mapper = importlib.import_module(argv.show_custom_function) mapper.nuxeo_mapper(documents, nx) else: nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser( description='Print nuxeo path for given uid.') parser.add_argument('uid', help="Nuxeo uid") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() uid = argv.uid nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) metadata = nx.get_metadata(uid=uid) path = metadata['path'] print path, uid
def main(argv=None): parser = argparse.ArgumentParser( description='print info for items in collection where media.json ' 'file is missing.' ) parser.add_argument('path', help="Nuxeo document path for collection") parser.add_argument('bucket', help="S3 bucket name") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nuxeo_path = argv.path bucketpath = argv.bucket print "collection nuxeo_path:", nuxeo_path # get the Nuxeo ID for the collection nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) nuxeo_id = nx.get_uid(nuxeo_path) print "collection nuxeo_id:", nuxeo_id # connect to S3 conn = connect_s3(calling_format=OrdinaryCallingFormat()) bucketpath = bucketpath.strip("/") bucketbase = bucketpath.split("/")[0] print "bucketpath:", bucketpath print "bucketbase:", bucketbase try: bucket = conn.get_bucket(bucketbase) except boto.exception.S3ResponseError: print "bucket doesn't exist on S3:", bucketbase items = nx.children(nuxeo_path) for item in items: obj_key = "{0}-media.json".format(item['uid']) s3_url = "s3://{0}/{1}".format(bucketpath, obj_key) #print "s3_url:", s3_url parts = urlparse.urlsplit(s3_url) #print "obj_key", obj_key #print "s3_url", s3_url if not (bucket.get_key(parts.path)): print "object doesn't exist on S3:", parts.path '''
def main(argv=None): parser = argparse.ArgumentParser( description='get media.json file for given nuxeo path') parser.add_argument('path', help="Nuxeo document path") parser.add_argument('bucket', help="S3 bucket name") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nuxeo_path = argv.path bucketpath = argv.bucket print "nuxeo_path:", nuxeo_path # get the Nuxeo ID nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) nuxeo_id = nx.get_uid(nuxeo_path) print "nuxeo_id:", nuxeo_id # see if a media.json file exists on S3 for this object conn = connect_s3(calling_format=OrdinaryCallingFormat()) bucketpath = bucketpath.strip("/") bucketbase = bucketpath.split("/")[0] obj_key = "{0}-media.json".format(nuxeo_id) s3_url = "s3://{0}/{1}".format(bucketpath, obj_key) print "s3_url:", s3_url parts = urlparse.urlsplit(s3_url) print "bucketpath:", bucketpath print "bucketbase:", bucketbase try: bucket = conn.get_bucket(bucketbase) except boto.exception.S3ResponseError: print "bucket doesn't exist on S3:", bucketbase if not (bucket.get_key(parts.path)): print "object doesn't exist on S3:", parts.path else: print "yup the object exists!:", parts.path k = Key(bucket) k.key = parts.path print "\nfile contents:" print k.get_contents_as_string()
def main(argv=None): parser = argparse.ArgumentParser( description='run import of a folder into nuxeo') utils.get_common_options(parser) required_flags = parser.add_argument_group('there are four required arguments') required_flags.add_argument('--leaf_type', help="nuxeo document type for imported leaf nodes", required=True) required_flags.add_argument('--input_path', help="unix path to files", required=True) required_flags.add_argument('--target_path', help="target document for import in nuxeo (parent folder where new folder will be created)", required=True) required_flags.add_argument('--folderish_type', help="nuxeo document type for imported folder", required=True) parser.add_argument('--no_wait', help="don't poll/wait for the job to finish", dest="no_wait", action="store_false") parser.add_argument('--poll_interval', help="seconds to sleep for if waiting", dest="sleep", default=20, type=int) parser.add_argument('--skip_root_folder_creation', help="don't create root folder on import", dest="skip_root_folder_creation", action="store_true") if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) print nx.import_log_activate() print nx.import_one_folder(argv.leaf_type, argv.input_path, argv.target_path, argv.folderish_type, wait=argv.no_wait, sleep=argv.sleep, skip_root_folder_creation=argv.skip_root_folder_creation) print nx.call_file_importer_api('status') print nx.import_log()
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument('path', nargs=1, help="nuxeo document path") parser.add_argument('--outdir', help="directory to hold application/json+nxentity .json files") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.children(argv.path[0]) if argv.outdir: # Expand user- and relative-paths outdir = os.path.abspath(os.path.expanduser(argv.outdir)) nx.copy_metadata_to_local(documents, outdir) else: nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument('--outdir', help="directory to hold application/json+nxentity .json files", type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # http://demo.nuxeo.com/nuxeo/api/v1/path/default-domain/@search\?query\=SELECT%20\*%20FROM%20Document\&pageSize\=2\¤tPageIndex\=1 | jq . # todo; add these defaults as parameters as well as env nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.all() if argv.outdir: # Expand user- and relative-paths outdir = os.path.abspath(os.path.expanduser(argv.outdir)) nx.copy_metadata_to_local(documents, outdir) else: nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser(description='Import metadata into Nuxeo for LIJA2 (mets) collection.') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) naans = [dirs for root, dirs, files in os.walk(metadata_dir)][0] for naan in naans: naan_dir = os.path.join(metadata_dir, naan) arks = [dirs for root, dirs, files in os.walk(naan_dir)][0] for ark in arks: filepath = os.path.join(metadata_dir, naan, ark, ark + '.mets.xml') process_object(filepath, nx) print "\n\nPath components over Nuxeo length limit (" + str(nuxeo_limit) + "):" print "TOTAL:", len(toolong) for long in toolong: print long
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument( '--outdir', help="directory to hold application/json+nxentity .json files", type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # http://demo.nuxeo.com/nuxeo/api/v1/path/default-domain/@search\?query\=SELECT%20\*%20FROM%20Document\&pageSize\=2\¤tPageIndex\=1 | jq . # todo; add these defaults as parameters as well as env nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.all() if argv.outdir: # Expand user- and relative-paths outdir = os.path.abspath(os.path.expanduser(argv.outdir)) nx.copy_metadata_to_local(documents, outdir) else: nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser(description='nxql via REST API') parser.add_argument('nxql', nargs=1, help="nxql query", type=utf8_arg) parser.add_argument( '--outdir', help="directory to hold application/json+nxentity .json files", type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.nxql(argv.nxql[0]) if argv.outdir: # Expand user- and relative-paths outdir = os.path.abspath(os.path.expanduser(argv.outdir)) nx.copy_metadata_to_local(documents, outdir) else: nx.print_document_summary(documents)
def main(argv=None): parser = argparse.ArgumentParser(description='extent stats via Nuxeo REST API') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.nxql('select * from Document where ecm:path startswith "/asset-library/UCM"') duplicates = defaultdict(list) row = 0 for document in documents: for blob in blob_from_doc(document): if blob: duplicates[blob['digest']].append((blob['uid'] ,u'{0}#{1}'.format(blob['path'], blob['xpath']).encode('utf-8'))) if row % 25000 == 0: print '{0} blobs checked'.format(row) row = row + 1 duplicates = {k: v for k, v in duplicates.items() if len(v) > 1} # http://stackoverflow.com/a/8425075 pp(duplicates) print(len(duplicates))
def main(argv=None): parser = argparse.ArgumentParser(description='extent stats via Nuxeo REST API') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() file_dict = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_dict[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count)) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.nxql('select * from Document') row = 0 for document in documents: for blob in blob_from_doc(document): if blob: s3_size = file_dict.get(blob['digest'], None) if not s3_size: print('{0} from {1} {2} not found in S3' .format(blob['digest'], blob['path'], blob['xpath'])) if file_dict.get(blob['digest'], 0) != int(blob['length']): print('{0} from {1} {2} s3 size {3} does not match nuxeo size {3}' .format(blob['digest'], blob['path'], blob['xpath'], s3_size, blob['length'])) if row % 25000 == 0: print('{0} nuxeo blobs checked'.format(row)) row = row + 1
def main(argv=None): """main""" parser = argparse.ArgumentParser( description='nuxeo metadata via REST API, one record') parser.add_argument('file', nargs=1, help="application/json+nxentity") group = parser.add_mutually_exclusive_group() group.add_argument('--uid', help="update specific nuxeo uid") group.add_argument( '--path', help="update specific nuxeo path", type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # todo; add these defaults as parameters as well as env nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) pp(argv.file[0]) jfile = argv.file[0] uid = argv.uid path = argv.path json_data = open(jfile) data = json.load(json_data) ret = {} if uid: # use uid supplied at command line ret = nx.update_nuxeo_properties(data, uid=uid) elif path: # use path supplied at command line ret = nx.update_nuxeo_properties(data, path=path) # if no uid nor path was specified on the command line, then # prefer "path": to "uid": when importing files because the file may have # come from another machine where the uuids are different else: uid = nx.get_uid(data.get('path')) or data.get('uid') ret = nx.update_nuxeo_properties(data, uid=uid) if not ret: print("no uid found, specify --uid or --path") exit(1) pp(ret)
def main(argv=None): """main""" parser = argparse.ArgumentParser( description='nuxeo metadata via REST API, one record' ) parser.add_argument('file', nargs=1, help="application/json+nxentity") group = parser.add_mutually_exclusive_group() group.add_argument('--uid', help="update specific nuxeo uid") group.add_argument('--path', help="update specific nuxeo path", type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # todo; add these defaults as parameters as well as env nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) pp(argv.file[0]) jfile = argv.file[0] uid = argv.uid path = argv.path json_data = open(jfile) data = json.load(json_data) ret = {} if uid: # use uid supplied at command line ret = nx.update_nuxeo_properties(data, uid=uid) elif path: # use path supplied at command line ret = nx.update_nuxeo_properties(data, path=path) # if no uid nor path was specified on the command line, then # prefer "path": to "uid": when importing files because the file may have # come from another machine where the uuids are different else: uid = nx.get_uid(data.get('path')) or data.get('uid') ret = nx.update_nuxeo_properties(data, uid=uid) if not ret: print "no uid found, specify --uid or --path" exit(1) pp(ret)
def main(argv=None): parser = argparse.ArgumentParser( description='extent stats via Nuxeo REST API') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() file_dict = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_dict[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count)) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.nxql('select * from Document') row = 0 for document in documents: for blob in blob_from_doc(document): if blob: s3_size = file_dict.get(blob['digest'], None) if not s3_size: print('{0} from {1} {2} not found in S3'.format( blob['digest'], blob['path'], blob['xpath'])) if file_dict.get(blob['digest'], 0) != int(blob['length']): print( '{0} from {1} {2} s3 size {3} does not match nuxeo size {3}' .format(blob['digest'], blob['path'], blob['xpath'], s3_size, blob['length'])) if row % 25000 == 0: print('{0} nuxeo blobs checked'.format(row)) row = row + 1
def main(argv=None): parser = argparse.ArgumentParser( description="extent stats via Nuxeo REST API") parser.add_argument( "outdir", nargs=1, ) parser.add_argument("--no-s3-check", dest="s3_check", action="store_false") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() os.makedirs(argv.outdir[0], exist_ok=True) # look up all the files in S3, so we can double check that all # the files exist as we loop through Nuxeo file_check = None s3_bytes = s3_count = 0 if argv.s3_check: from boto import s3 from boto.s3.connection import OrdinaryCallingFormat file_check = {} conn = s3.connect_to_region("us-west-2", calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket("data.nuxeo.cdlib.org.oregon") for count, key in enumerate(bucket.list()): file_check[key.name] = key.size if count % 50000 == 0: print("{0} s3 files memorized".format(count), file=sys.stderr) s3_bytes = s3_bytes + key.size s3_count = len(file_check) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) campuses = [ "UCB", "UCD", "UCI", "UCLA", "UCM", "UCOP", "UCR", "UCSB", "UCSC", "UCSD", "UCSF", ] summary_workbook = xlsxwriter.Workbook( os.path.join(argv.outdir[0], "{}-summary.xlsx".format(today))) # cell formats header_format = summary_workbook.add_format({ "bold": True, }) number_format = summary_workbook.add_format() number_format.set_num_format("#,##0") summary_worksheet = summary_workbook.add_worksheet("summary") # headers summary_worksheet.write(0, 1, "deduplicated files", header_format) summary_worksheet.write(0, 2, "deduplicated bytes", header_format) summary_worksheet.write(0, 4, "total files", header_format) summary_worksheet.write(0, 5, "total bytes", header_format) if argv.s3_check: summary_worksheet.write(0, 7, "files on S3", header_format) summary_worksheet.write(0, 8, "bytes on S3", header_format) # widths summary_worksheet.set_column( 0, 1, 10, ) summary_worksheet.set_column( 2, 2, 25, ) summary_worksheet.set_column( 3, 4, 10, ) summary_worksheet.set_column( 5, 5, 25, ) summary_worksheet.set_column( 6, 7, 10, ) summary_worksheet.set_column( 8, 8, 25, ) summary_worksheet.set_column( 9, 9, 10, ) true_count = dedup_total = total_count = running_total = 0 row = 1 for campus in campuses: (this_count, this_total, dedup_count, dedup_bytes) = forCampus(campus, file_check, argv.outdir[0], nx) # write out this row in the sheet summary_worksheet.write(row, 0, campus) summary_worksheet.write(row, 1, dedup_count, number_format) summary_worksheet.write(row, 2, dedup_bytes, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes)) summary_worksheet.write(row, 4, this_count, number_format) summary_worksheet.write(row, 5, this_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(this_total)) # keep track of running totals total_count = total_count + this_count # number of files running_total = running_total + this_total # number of bytes true_count = true_count + dedup_count dedup_total = dedup_total + dedup_bytes # number of bytes row = row + 1 # write totals in the summary worksheet summary_worksheet.write(row, 0, "{}".format(today)) summary_worksheet.write(row, 1, true_count, number_format) summary_worksheet.write(row, 2, dedup_total, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_total)) summary_worksheet.write(row, 4, total_count, number_format) summary_worksheet.write(row, 5, running_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(running_total)) if argv.s3_check: summary_worksheet.write(row, 7, s3_count, number_format) summary_worksheet.write(row, 8, s3_bytes, number_format) summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes)) summary_workbook.close()
def main(argv=None): parser = argparse.ArgumentParser( description='print info for items in collection where media.json ' 'file is missing.' ) parser.add_argument('path', help="Nuxeo document path for collection") parser.add_argument('bucket', help="S3 bucket name") parser.add_argument("--pynuxrc", default='~/.pynuxrc', help="rc file for use by pynux") parser.add_argument( '--stash', action="store_true", help="create and stash missing media.json file") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nuxeo_path = argv.path bucketpath = argv.bucket pynuxrc = argv.pynuxrc stash = argv.stash print("collection nuxeo_path:", nuxeo_path) # get the Nuxeo ID for the collection nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) nuxeo_id = nx.get_uid(nuxeo_path) print("collection nuxeo_id:", nuxeo_id) # connect to S3 conn = connect_s3(calling_format=OrdinaryCallingFormat()) bucketpath = bucketpath.strip("/") bucketbase = bucketpath.split("/")[0] print("bucketpath:", bucketpath) print("bucketbase:", bucketbase) try: bucket = conn.get_bucket(bucketbase) except boto.exception.S3ResponseError: print("bucket doesn't exist on S3:", bucketbase) items = nx.children(nuxeo_path) for item in items: obj_key = "{0}-media.json".format(item['uid']) s3_url = "s3://{0}/{1}".format(bucketpath, obj_key) #print "s3_url:", s3_url parts = urlparse.urlsplit(s3_url) #print "obj_key", obj_key #print "s3_url", s3_url if item['type'] != 'Organization' and not (bucket.get_key(parts.path)): print("object doesn't exist on S3:", parts.path, item['path']) if stash: nxstash = NuxeoStashMediaJson( item['path'], MEDIA_JSON_BUCKET, MEDIA_JSON_REGION, pynuxrc, True) nxstash.nxstashref() print("stashed for item['path']") '''
def main(argv=None): parser = argparse.ArgumentParser( description='extent stats via Nuxeo REST API') parser.add_argument('path', nargs=1, help="root path") parser.add_argument( 'outdir', nargs=1, ) parser.add_argument('--no-s3-check', dest='s3_check', action='store_false') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # look up all the files in S3, so we can double check that all # the files exist as we loop through Nuxeo file_check = None s3_bytes = s3_count = 0 if argv.s3_check: from boto import s3 from boto.s3.connection import OrdinaryCallingFormat file_check = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_check[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count), file=sys.stderr) s3_bytes = s3_bytes + key.size s3_count = len(file_check) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) campuses = nx.children(argv.path[0]) summary_workbook = xlsxwriter.Workbook( os.path.join(argv.outdir[0], '{}-summary.xlsx'.format(today))) # cell formats header_format = summary_workbook.add_format({ 'bold': True, }) number_format = summary_workbook.add_format() number_format.set_num_format('#,##0') summary_worksheet = summary_workbook.add_worksheet('summary') # headers summary_worksheet.write(0, 1, 'deduplicated files', header_format) summary_worksheet.write(0, 2, 'deduplicated bytes', header_format) summary_worksheet.write(0, 4, 'total files', header_format) summary_worksheet.write(0, 5, 'total bytes', header_format) if argv.s3_check: summary_worksheet.write(0, 7, 'files on S3', header_format) summary_worksheet.write(0, 8, 'bytes on S3', header_format) # widths summary_worksheet.set_column( 0, 1, 10, ) summary_worksheet.set_column( 2, 2, 25, ) summary_worksheet.set_column( 3, 4, 10, ) summary_worksheet.set_column( 5, 5, 25, ) summary_worksheet.set_column( 6, 7, 10, ) summary_worksheet.set_column( 8, 8, 25, ) summary_worksheet.set_column( 9, 9, 10, ) true_count = dedup_total = total_count = running_total = 0 row = 1 for campus in campuses: basename = os.path.basename(campus['path']) documents = nx.nxql( 'select * from Document where ecm:path startswith"{0}"'.format( campus['path'])) (this_count, this_total, dedup_count, dedup_bytes) = forCampus(documents, basename, file_check, argv.outdir[0]) summary_worksheet.write(row, 0, basename) summary_worksheet.write(row, 1, dedup_count, number_format) summary_worksheet.write(row, 2, dedup_bytes, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes)) summary_worksheet.write(row, 4, this_count, number_format) summary_worksheet.write(row, 5, this_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(this_total)) total_count = total_count + this_count # number of files running_total = running_total + this_total # number of bytes true_count = true_count + dedup_count dedup_total = dedup_total + dedup_bytes # number of bytes row = row + 1 summary_worksheet.write(row, 0, '{}'.format(today)) summary_worksheet.write(row, 1, true_count, number_format) summary_worksheet.write(row, 2, dedup_total, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_total)) summary_worksheet.write(row, 4, total_count, number_format) summary_worksheet.write(row, 5, running_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(running_total)) if argv.s3_check: summary_worksheet.write(row, 7, s3_count, number_format) summary_worksheet.write(row, 8, s3_bytes, number_format) summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes)) summary_workbook.close()
def main(argv=None): parser = argparse.ArgumentParser( description= 'nxid finds top level objects in Nuxeo and syncs them up with EZID') parser.add_argument('path', nargs=1, help='nuxeo path (folder or object)', type=utf8_arg) ezid_group = parser.add_argument_group('minting behaviour flags') ezid_group.add_argument( '--mint', '-m', action='store_true', help='when an ARK is missing, mint and bind new ARK in EZID') ezid_group.add_argument( '--create', '-c', action='store_true', help='when an ARK is found in Nuxeo but not EZID, create EZID') ezid_group.add_argument( '--update', '-u', action='store_true', help='when an ARK is found in Nuxeo and EZID, update EZID') ezid_group.add_argument( '--no-noop-report', action='store_true', help='override default behaviour of reporting on noops') ezid_group.add_argument('--show-erc', action='store_true', help='show ANVL record that will be sent to EZID') conf_group = parser.add_argument_group('EZID configuration and metadata') conf_group.add_argument('--ezid-username', help='username for EZID API (overrides rcfile)', type=utf8_arg) conf_group.add_argument('--ezid-password', help='password for EZID API (overrides rc file)', type=utf8_arg) conf_group.add_argument('--shoulder', help='shoulder (overrides rcfile)', type=utf8_arg) conf_group.add_argument('--owner', help='set as _owner for EZID', type=utf8_arg) conf_group.add_argument( '--status', help='set as _status for EZID (public|reserved|unavailable)', type=utf8_arg) conf_group.add_argument('--publisher', help='set as dc.publisher for EZID', type=utf8_arg) conf_group.add_argument('--location', help='set location URL prefix for EZID', type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) # read config out of .pynuxrc file username = argv.ezid_username or nx.ezid_conf['username'] password = argv.ezid_password or nx.ezid_conf['password'] shoulder = argv.shoulder or nx.ezid_conf['shoulder'] ezid = EZID.EZIDClient( credentials=dict(username=username, password=password)) # query to select all parent level objects documents = nx.nxql(u''' SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio WHERE ecm:path STARTSWITH "{}" AND ecm:currentLifeCycleState != "deleted" AND ecm:pos is NULL'''.format(argv.path[0])) # if the user gives the full path to a document if not any( True for _ in documents): # https://stackoverflow.com/a/3114640/1763984 documents = nx.nxql(u''' SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio WHERE ecm:path = "{}" AND ecm:currentLifeCycleState != "deleted" AND ecm:pos is NULL'''.format(argv.path[0])) report = not (argv.no_noop_report) # main loop for item in documents: # check id for ARK ark = find_ark(item['properties']['ucldc_schema:identifier']) path = item['path'] # if there is an ARK, check for a record in EZID ezid_status = None if ark is not None: ezid_status = check_ezid(ark, ezid) ezdata = item_erc_dict( item, owner=argv.owner, # _owner status=argv.status, # _status publisher=argv.publisher, # dc.publisher location=argv.location # _target ) if argv.show_erc: print(EZID.formatAnvlFromDict(ezdata)) print('') # mint if not (ark) and not (ezid_status): if argv.mint: new_ark = ezid.mint(shoulder, ezdata) update_nuxeo(item, nx, new_ark) print('✓ mint "{}" {}'.format(path, new_ark)) elif report: print('ℹ noop mint "{}"'.format(path)) # create if ark and not (ezid_status): if argv.create: ezid.create(ark, ezdata) print('✓ create "{}" {}'.format(path, ark)) elif report: print('ℹ noop create "{}" {}'.format(path, ark)) # update if ark and ezid_status: owner = get_owner(ezid_status) if argv.update: ezid.update(ark, ezdata) print('✓ update "{}" {}'.format(path, ark)) elif report: print('ℹ noop update "{}" {} {}'.format(path, ark, owner))
def main(argv=None): parser = argparse.ArgumentParser(description='nuxeo metadata via REST API') parser.add_argument('path', nargs=1, help="nuxeo document path") utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) documents = nx.children(argv.path[0]) # open the workbook workbook = xlsxwriter.Workbook('qa.xlsx') header_format = workbook.add_format({ 'bold': True, }) report = workbook.add_worksheet() report.set_column( 0, 0, 10, ) report.set_column( 1, 2, 40, ) report.set_column( 3, 4, 80, ) report.write(0, 0, 'nuxeo-uid', header_format) report.write(0, 1, 'ucldc_schema:localidentifier', header_format) report.write(0, 2, 'filename', header_format) report.write(0, 3, 'nuxeo-path', header_format) report.write(0, 4, 'title', header_format) # document specified on command line root_doc = nx.get_metadata(path=argv.path[0]) report.write(1, 0, root_doc['uid']) report.write(1, 3, argv.path[0]) row = 2 for document in documents: p = document['properties'] report.write(row, 0, document['uid']) report.write(row, 1, p['ucldc_schema:localidentifier'][0]) if 'file:filename' in p: report.write(row, 2, p['file:filename']) report.write(row, 3, document['path'].replace(argv.path[0], '', 1)) report.write(row, 4, document['title']) row = row + 1 # output # path|localid|title # workbook.close()
def main(argv=None): parser = argparse.ArgumentParser( description='nxidbatch mints a batch of ARKs') parser.add_argument('batchsize', nargs=1, help='size of ARK batch', type=int) ezid_group = parser.add_argument_group('minting behaviour flags') ezid_group.add_argument('--mint', '-m', action='store_true', help='mint ARKs without prompt') ezid_group.add_argument('--output', '-o', type=lambda x: is_valid_file(parser, x), required=True) conf_group = parser.add_argument_group('EZID configuration and metadata') conf_group.add_argument('--ezid-username', help='username for EZID API (overrides rcfile)', type=utf8_arg) conf_group.add_argument('--ezid-password', help='password for EZID API (overrides rc file)', type=utf8_arg) conf_group.add_argument('--shoulder', help='shoulder (overrides rcfile)', type=utf8_arg) conf_group.add_argument('--owner', help='set as _owner for EZID', type=utf8_arg) conf_group.add_argument( '--status', help= 'set as _status for EZID (default reserved, or public|unavailable)', default="reserved", type=utf8_arg) conf_group.add_argument('--publisher', help='set as dc.publisher for EZID', type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) # read config out of .pynuxrc file username = argv.ezid_username or nx.ezid_conf['username'] password = argv.ezid_password or nx.ezid_conf['password'] shoulder = argv.shoulder or nx.ezid_conf['shoulder'] ezid = EZID.EZIDClient( credentials=dict(username=username, password=password)) if argv.mint: output = open(argv.output, 'w') else: # https://stackoverflow.com/a/26514097/1763984 answer = raw_input( 'Mint a batch {} of {} ARKs with prefix {} with EZID? [y/n]'. format(argv.output, argv.batchsize, shoulder)) if not answer or answer[0].lower() != 'y': print('You did not indicate approval') exit(1) else: output = open(argv.output, 'w') for __ in range(argv.batchsize[0]): # mint new_ark = ezid.mint(shoulder) print(new_ark, file=output) if not (argv.mint): print('done')
def main(argv=None): parser = argparse.ArgumentParser(description='extent stats via Nuxeo REST API') parser.add_argument('path', nargs=1, help="root path") parser.add_argument('outdir', nargs=1,) parser.add_argument('--no-s3-check', dest='s3_check', action='store_false') utils.get_common_options(parser) if argv is None: argv = parser.parse_args() # look up all the files in S3, so we can double check that all # the files exist as we loop through Nuxeo file_check = None s3_bytes = s3_count = 0 if argv.s3_check: from boto import s3 from boto.s3.connection import OrdinaryCallingFormat file_check = {} conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon') for count, key in enumerate(bucket.list()): file_check[key.name] = key.size if count % 50000 == 0: print('{0} s3 files memorized'.format(count), file=sys.stderr) s3_bytes = s3_bytes + key.size s3_count = len(file_check) nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) campuses = nx.children(argv.path[0]) summary_workbook = xlsxwriter.Workbook(os.path.join(argv.outdir[0],'{}-summary.xlsx'.format(today))) # cell formats header_format = summary_workbook.add_format({'bold': True, }) number_format = summary_workbook.add_format() number_format.set_num_format('#,##0') summary_worksheet = summary_workbook.add_worksheet('summary') # headers summary_worksheet.write(0, 1, 'deduplicated files', header_format) summary_worksheet.write(0, 2, 'deduplicated bytes', header_format) summary_worksheet.write(0, 4, 'total files', header_format) summary_worksheet.write(0, 5, 'total bytes', header_format) if argv.s3_check: summary_worksheet.write(0, 7, 'files on S3', header_format) summary_worksheet.write(0, 8, 'bytes on S3', header_format) # widths summary_worksheet.set_column(0, 1, 10, ) summary_worksheet.set_column(2, 2, 25, ) summary_worksheet.set_column(3, 4, 10, ) summary_worksheet.set_column(5, 5, 25, ) summary_worksheet.set_column(6, 7, 10, ) summary_worksheet.set_column(8, 8, 25, ) summary_worksheet.set_column(9, 9, 10, ) true_count = dedup_total = total_count = running_total = 0 row = 1 for campus in campuses: basename = os.path.basename(campus['path']) documents = nx.nxql( 'select * from Document where ecm:path startswith"{0}"'.format(campus['path']) ) (this_count, this_total, dedup_count, dedup_bytes) = forCampus(documents, basename, file_check, argv.outdir[0]) summary_worksheet.write(row, 0, basename) summary_worksheet.write(row, 1, dedup_count, number_format) summary_worksheet.write(row, 2, dedup_bytes, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes)) summary_worksheet.write(row, 4, this_count, number_format) summary_worksheet.write(row, 5, this_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(this_total)) total_count = total_count + this_count # number of files running_total = running_total + this_total # number of bytes true_count = true_count + dedup_count dedup_total = dedup_total + dedup_bytes # number of bytes row = row + 1 summary_worksheet.write(row, 0, '{}'.format(today)) summary_worksheet.write(row, 1, true_count, number_format) summary_worksheet.write(row, 2, dedup_total, number_format) summary_worksheet.write(row, 3, sizeof_fmt(dedup_total)) summary_worksheet.write(row, 4, total_count, number_format) summary_worksheet.write(row, 5, running_total, number_format) summary_worksheet.write(row, 6, sizeof_fmt(running_total)) if argv.s3_check: summary_worksheet.write(row, 7, s3_count, number_format) summary_worksheet.write(row, 8, s3_bytes, number_format) summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes)) summary_workbook.close()
def main(argv=None): parser = argparse.ArgumentParser( description='nxidbatch mints a batch of ARKs') parser.add_argument( 'batchsize', nargs=1, help='size of ARK batch', type=int) ezid_group = parser.add_argument_group('minting behaviour flags') ezid_group.add_argument( '--mint', '-m', action='store_true', help='mint ARKs without prompt') ezid_group.add_argument( '--output', '-o', type=lambda x: is_valid_file(parser, x), required=True) conf_group = parser.add_argument_group('EZID configuration and metadata') conf_group.add_argument( '--ezid-username', help='username for EZID API (overrides rcfile)', type=utf8_arg) conf_group.add_argument( '--ezid-password', help='password for EZID API (overrides rc file)', type=utf8_arg) conf_group.add_argument( '--shoulder', help='shoulder (overrides rcfile)', type=utf8_arg) conf_group.add_argument( '--owner', help='set as _owner for EZID', type=utf8_arg) conf_group.add_argument( '--status', help= 'set as _status for EZID (default reserved, or public|unavailable)', default="reserved", type=utf8_arg) conf_group.add_argument( '--publisher', help='set as dc.publisher for EZID', type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) # read config out of .pynuxrc file username = argv.ezid_username or nx.ezid_conf['username'] password = argv.ezid_password or nx.ezid_conf['password'] shoulder = argv.shoulder or nx.ezid_conf['shoulder'] ezid = EZID.EZIDClient(credentials=dict( username=username, password=password)) if argv.mint: output = open(argv.output, 'w') else: # https://stackoverflow.com/a/26514097/1763984 answer = raw_input( 'Mint a batch {} of {} ARKs with prefix {} with EZID? [y/n]'. format(argv.output, argv.batchsize, shoulder)) if not answer or answer[0].lower() != 'y': print('You did not indicate approval') exit(1) else: output = open(argv.output, 'w') for __ in range(argv.batchsize[0]): # mint new_ark = ezid.mint(shoulder) print(new_ark, file=output) if not (argv.mint): print('done')
def main(argv=None): parser = argparse.ArgumentParser( description='nxid finds top level objects in Nuxeo and syncs them up with EZID') parser.add_argument( 'path', nargs=1, help='nuxeo path (folder or object)', type=utf8_arg) ezid_group = parser.add_argument_group('minting behaviour flags') ezid_group.add_argument( '--mint', '-m', action='store_true', help='when an ARK is missing, mint and bind new ARK in EZID') ezid_group.add_argument( '--create', '-c', action='store_true', help='when an ARK is found in Nuxeo but not EZID, create EZID') ezid_group.add_argument( '--update', '-u', action='store_true', help='when an ARK is found in Nuxeo and EZID, update EZID') ezid_group.add_argument( '--no-noop-report', action='store_true', help='override default behaviour of reporting on noops') ezid_group.add_argument( '--show-erc', action='store_true', help='show ANVL record that will be sent to EZID') conf_group = parser.add_argument_group('EZID configuration and metadata') conf_group.add_argument( '--ezid-username', help='username for EZID API (overrides rcfile)', type=utf8_arg) conf_group.add_argument( '--ezid-password', help='password for EZID API (overrides rc file)', type=utf8_arg) conf_group.add_argument( '--shoulder', help='shoulder (overrides rcfile)', type=utf8_arg) conf_group.add_argument( '--owner', help='set as _owner for EZID', type=utf8_arg) conf_group.add_argument( '--status', help='set as _status for EZID (public|reserved|unavailable)', type=utf8_arg) conf_group.add_argument( '--publisher', help='set as dc.publisher for EZID', type=utf8_arg) conf_group.add_argument( '--location', help='set location URL prefix for EZID', type=utf8_arg) utils.get_common_options(parser) if argv is None: argv = parser.parse_args() nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper()) # read config out of .pynuxrc file username = argv.ezid_username or nx.ezid_conf['username'] password = argv.ezid_password or nx.ezid_conf['password'] shoulder = argv.shoulder or nx.ezid_conf['shoulder'] ezid = EZID.EZIDClient(credentials=dict(username=username, password=password)) # query to select all parent level objects documents = nx.nxql(u''' SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio WHERE ecm:path STARTSWITH "{}" AND ecm:currentLifeCycleState != "deleted" AND ecm:pos is NULL'''.format(argv.path[0])) # if the user gives the full path to a document if not any(True for _ in documents): # https://stackoverflow.com/a/3114640/1763984 documents = nx.nxql(u''' SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio WHERE ecm:path = "{}" AND ecm:currentLifeCycleState != "deleted" AND ecm:pos is NULL'''.format(argv.path[0])) report = not(argv.no_noop_report) # main loop for item in documents: # check id for ARK ark = find_ark(item['properties']['ucldc_schema:identifier']) path = item['path'] # if there is an ARK, check for a record in EZID ezid_status = None if ark is not None: ezid_status = check_ezid(ark, ezid) ezdata = item_erc_dict( item, owner=argv.owner, # _owner status=argv.status, # _status publisher=argv.publisher, # dc.publisher location=argv.location # _target ) if argv.show_erc: print(EZID.formatAnvlFromDict(ezdata)) print('') # mint if not(ark) and not(ezid_status): if argv.mint: new_ark = ezid.mint(shoulder, ezdata) update_nuxeo(item, nx, new_ark) print('✓ mint "{}" {}'.format(path, new_ark)) elif report: print('ℹ noop mint "{}"'.format(path)) # create if ark and not(ezid_status): if argv.create: ezid.create(ark, ezdata) print('✓ create "{}" {}'.format(path, ark)) elif report: print('ℹ noop create "{}" {}'.format(path, ark)) # update if ark and ezid_status: owner = get_owner(ezid_status) if argv.update: ezid.update(ark, ezdata) print('✓ update "{}" {}'.format(path, ark)) elif report: print('ℹ noop update "{}" {} {}'.format(path, ark, owner))