def main(argv, session): args = docopt(__doc__, argv=argv) # Tasks write API. if args['--cmd']: data = get_args_dict(args['--data'], query_string=True) r = session.submit_task(args['<identifier>'], args['--cmd'], comment=args['--comment'], priority=data.get('priority'), data=data) j = r.json() if j.get('success'): print('success: {}'.format(j.get('value', dict()).get('log'))) sys.exit(0) else: print('error: {}'.format(j.get('error'))) sys.exit(1) # Tasks read API. params = get_args_dict(args['--parameter'], query_string=True) if args['<identifier>']: _params = dict(identifier=args['<identifier>'], catalog=1, history=1) _params.update(params) params = _params elif args['--get-task-log']: log = session.get_task_log(args['--get-task-log'], params) if six.PY2: print(log.encode('utf-8')) else: print(log) sys.exit(0) queryable_params = [ 'identifier', 'task_id', 'server', 'cmd', 'args', 'submitter', 'priority', 'wait_admin', 'submittime', ] if not args['<identifier>'] \ and not params.get('task_id'): params.update(dict(catalog=1, history=0)) if not any(x in params for x in queryable_params): _params = dict(submitter='*****@*****.**', catalog=1, history=0, summary=0) _params.update(params) params = _params for t in session.get_tasks(params=params): print(t.json()) sys.stdout.flush()
def main(argv, session): args = docopt(__doc__, argv=argv) params = get_args_dict(args['--parameter']) row_types = { -1: 'done', 0: 'green', 1: 'blue', 2: 'red', 9: 'brown', } task_type = None if args['--green-rows']: task_type = 'green' elif args['--blue-rows']: task_type = 'blue' elif args['--red-rows']: task_type = 'red' try: try: if args['<identifier>']: tasks = session.get_tasks(identifier=args['<identifier>'], task_type=task_type, params=params) elif args['--get-task-log']: task = session.get_tasks(task_ids=args['--get-task-log'], params=params) if task: log = task[0].task_log() sys.exit(print(log)) else: print('error retrieving task-log ' 'for {0}\n'.format(args['--get-task-log']), file=sys.stderr) sys.exit(1) elif args['--task']: tasks = session.get_tasks(task_ids=args['--task'], params=params) else: tasks = session.get_tasks(task_type=task_type, params=params) except ValueError as exc: print('error: unable to parse JSON. have you run `ia configure`?'.format(exc), file=sys.stderr) sys.exit(1) for t in tasks: task_info = [ t.identifier, t.task_id, t.server, t.time, t.command, row_types[t.row_type], ] if args['--verbose']: # parse task args and append to task_info list. targs = '\t'.join(['{0}={1}'.format(k, v) for (k, v) in t.args.items()]) task_info += [t.submitter, targs] print('\t'.join([str(x) for x in task_info])) except NameError as exc: print('error: {0}'.format(exc.message), file=sys.stderr) sys.exit(1)
def main(argv, session=None): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: Use(bool), '<query>': Use(lambda x: ' '.join(x)), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--sort': list, '--field': list, '--timeout': Use(lambda x: float(x[0]), error='--timeout must be integer or float.') }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Support comma separated values. fields = list(chain.from_iterable([x.split(',') for x in args['--field']])) sorts = list(chain.from_iterable([x.split(',') for x in args['--sort']])) r_kwargs = dict( headers=args['--header'], timeout=args['--timeout'], ) search = session.search_items(args['<query>'], fields=fields, sorts=sorts, params=args['--parameters'], request_kwargs=r_kwargs) try: if args['--num-found']: print('{0}'.format(search.num_found)) sys.exit(0) for result in search: if args['--itemlist']: print(result.get('identifier', '')) else: j = json.dumps(result) print(j) except ValueError as e: print('error: {0}'.format(e), file=sys.stderr) except ConnectTimeout as exc: print('error: Request timed out. Increase the --timeout and try again.', file=sys.stderr) sys.exit(1) except AuthenticationError as exc: print('error: {}'.format(exc), file=sys.stderr) sys.exit(1)
def test_get_args_dict_query_string(): test_input = ['a=b,foo&c=d&e=f', 'foo:bar '] test_output = { 'a': 'b,foo', 'c': 'd', 'e': 'f', 'foo': 'bar ', } args_dict = get_args_dict(test_input, query_string=True) for key, value in args_dict.items(): assert test_output[key] == value
def test_get_args_dict(): test_input = [ 'collection:test_collection', "description: Attention: multiple colon's", 'unicode_test:தமிழ்', 'subject:subject1', 'subject:subject2', ] test_output = { 'collection': 'test_collection', 'description': " Attention: multiple colon's", 'unicode_test': 'தமிழ்', 'subject': ['subject1', 'subject2'] } args_dict = get_args_dict(test_input) for key, value in args_dict.items(): assert test_output[key] == value
def main(argv, session=None): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: Use(bool), '<query>': Use(lambda x: ' '.join(x)), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--sort': list, '--field': Use(lambda x: ['identifier'] if not x and args['--itemlist'] else x), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Support comma separated values. fields = list(chain.from_iterable([x.split(',') for x in args['--field']])) sorts = list(chain.from_iterable([x.split(',') for x in args['--sort']])) search = search_items(args['<query>'], fields=fields, sorts=sorts, params=args['--parameters']) if args['--num-found']: print('{0}'.format(search.num_found)) sys.exit(0) try: for result in search: if args['--itemlist']: print(result.get('identifier', '')) else: j = json.dumps(result) print(j) except ValueError as e: print('error: {0}'.format(e), file=sys.stderr)
def test_get_args_dict(): test_input = [ 'collection:test_collection', "description: Attention: multiple colons", 'unicode_test:தமிழ்', 'subject:subject1, subject1', 'subject:subject2', 'subject:subject3; subject3', ] test_output = { 'collection': 'test_collection', 'description': " Attention: multiple colons", 'unicode_test': 'தமிழ்', 'subject': ['subject1, subject1', 'subject2', 'subject3; subject3'], } args_dict = get_args_dict(test_input) for key, value in args_dict.items(): assert test_output[key] == value
def test_get_args_dict(): test_input = [ 'collection:test_collection', "description: Attention: multiple colons", 'unicode_test:தமிழ்', 'subject:subject1', 'subject:subject2', 'a=b&a=c&a=d', 'b = a , b = c , b = d', 'c = a ; c = b ; c = d' ] test_output = { 'collection': 'test_collection', 'description': "Attention: multiple colons", 'unicode_test': 'தமிழ்', 'subject': ['subject1', 'subject2'], 'a': ['b', 'c', 'd'], 'b': ['a', 'c', 'd'], 'c': ['a', 'b', 'd'] } args_dict = get_args_dict(test_input) for key, value in args_dict.items(): assert test_output[key] == value
def main(argv, session): args = docopt(__doc__, argv=argv) params = get_args_dict(args["--parameter"]) row_types = {-1: "done", 0: "green", 1: "blue", 2: "red", 9: "brown"} task_type = None if args["--green-rows"]: task_type = "green" elif args["--blue-rows"]: task_type = "blue" elif args["--red-rows"]: task_type = "red" try: if args["<identifier>"]: tasks = get_tasks(identifier=args["<identifier>"], task_type=task_type, params=params) elif args["--get-task-log"]: task = get_tasks(task_ids=args["--get-task-log"], params=params) if task: log = task[0].task_log() sys.stdout.write(log) else: sys.stderr.write("error retrieving task-log for {0}\n".format(args["--get-task-log"])) sys.exit(1) sys.exit(0) elif args["--task"]: tasks = get_tasks(task_ids=args["--task"], params=params) else: tasks = get_tasks(task_type=task_type, params=params) for t in tasks: task_info = [t.identifier, t.task_id, t.server, t.time, t.command, row_types[t.row_type]] if args["--verbose"]: # parse task args and append to task_info list. targs = "\t".join(["{0}={1}".format(k, v) for (k, v) in t.args.items()]) task_info += [t.submitter, targs] sys.stdout.write("\t".join([str(x) for x in task_info]) + "\n") except NameError as exc: sys.stderr.write("error: {0}".format(exc.message)) sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) src_path = args['<src-identifier>/<src-file>'] dest_path = args['<dest-identifier>/<dest-file>'] headers = get_args_dict(args['--header']) # Add keep-old-version by default. if 'x-archive-keep-old-version' not in args['--header']: headers['x-archive-keep-old-version'] = '1' # First we use ia_copy, prep argv for ia_copy. argv.pop(0) argv = ['copy'] + argv # Call ia_copy. r, src_file = ia_copy.main(argv, session, cmd='move') dr = src_file.delete(headers=headers, cascade_delete=True) if dr.status_code == 204: print('success: moved {} to {}'.format(src_path, dest_path)) sys.exit(0) print('error: {}'.format(dr.content))
def main(argv, session=None): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: Use(bool), '<query>': Use(lambda x: ' '.join(x)), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--sort': list, '--field': Use(lambda x: ['identifier'] if not x and args['--itemlist'] else x), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Support comma separated values. fields = list(chain.from_iterable([x.split(',') for x in args['--field']])) sorts = list(chain.from_iterable([x.split(',') for x in args['--sort']])) search = search_items(args['<query>'], fields=fields, sorts=sorts, params=args['--parameters']) if args['--num-found']: print('{0}'.format(search.num_found)) sys.exit(0) try: for result in search: if args['--itemlist']: print(result.get('identifier', '')) else: j = json.dumps(result) print(j) except ValueError as e: print('error: {0}'.format(e), file=sys.stderr)
def main(argv, session=None): args = docopt(__doc__, argv=argv) # Validate args. s = Schema( { six.text_type: Use(bool), "<query>": Use(lambda x: " ".join(x)), "--parameters": Use(lambda x: get_args_dict(x, query_string=True)), "--sort": list, "--field": list, } ) try: args = s.validate(args) except SchemaError as exc: print("{0}\n{1}".format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Support comma separated values. fields = list(chain.from_iterable([x.split(",") for x in args["--field"]])) sorts = list(chain.from_iterable([x.split(",") for x in args["--sort"]])) search = session.search_items(args["<query>"], fields=fields, sorts=sorts, params=args["--parameters"]) try: if args["--num-found"]: print("{0}".format(search.num_found)) sys.exit(0) for result in search: if args["--itemlist"]: print(result.get("identifier", "")) else: j = json.dumps(result) print(j) except ValueError as e: print("error: {0}".format(e), file=sys.stderr)
def test_get_args_dict(): test_input = [ 'collection:test_collection', "description: Attention: multiple colons", 'unicode_test:தமிழ்', 'subject:subject1', 'subject:subject2', 'a=b&a=c&a=d', 'b = a , b = c , b = d', 'c = a ; c = b ; c = d' ] test_output = { 'collection': 'test_collection', 'description': "Attention: multiple colons", 'unicode_test': 'தமிழ்', 'subject': ['subject1', 'subject2'], 'a': ['b', 'c', 'd'], 'b': ['a', 'c', 'd'], 'c': ['a', 'b', 'd'] } args_dict = get_args_dict(test_input) for key, value in args_dict.items(): assert test_output[key] == value
def main(argv, session): args = docopt(__doc__, argv=argv) src_path = args['<src-identifier>/<src-file>'] dest_path = args['<dest-identifier>/<dest-file>'] # Validate args. s = Schema({ str: Use(bool), '--metadata': list, '--header': list, '<src-identifier>/<src-file>': And(str, lambda x: '/' in x, error='Source not formatted correctly. See usage example.'), '<dest-identifier>/<dest-file>': And(str, lambda x: '/' in x, error='Destiantion not formatted correctly. See usage example.'), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) headers = get_args_dict(args['--header']) # Add keep-old-version by default. if 'x-archive-keep-old-version' not in args['--header']: headers['x-archive-keep-old-version'] = '1' # First we use ia_copy, prep argv for ia_copy. argv.pop(0) argv = ['copy'] + argv # Call ia_copy. r, src_file = ia_copy.main(argv, session, cmd='move') dr = src_file.delete(headers=headers, cascade_delete=True) if dr.status_code == 204: print('success: moved {} to {}'.format(src_path, dest_path)) sys.exit(0) print('error: {}'.format(dr.content))
def main(argv, session=None): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: Use(bool), '<query>': Use(lambda x: ' '.join(x)), '--parameters': Use(lambda x: get_args_dict(x)), '--sort': list, '--field': Use(lambda x: ['identifier'] if not x and args['--itemlist'] else x), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Format sort paramaters. for i, field in enumerate(args['--sort']): key = 'sort[{0}]'.format(i) args['--parameters'][key] = field.strip().replace(':', ' ') search = search_items(args['<query>'], fields=args['--field'], params=args['--parameters']) if args['--num-found']: print('{0}'.format(search.num_found)) sys.exit(0) for result in search: if args['--itemlist']: print(result.get('identifier', '')) else: j = json.dumps(result) print(j)
def main(argv, session: ArchiveSession) -> None: args = docopt(__doc__, argv=argv) # Validation error messages. destdir_msg = '--destdir must be a valid path to a directory.' itemlist_msg = '--itemlist must be a valid path to an existing file.' # Validate args. s = Schema({ str: Use(bool), '--destdir': Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg), '--format': list, '--glob': Use(lambda item: item[0] if item else None), '<file>': list, '--search': Or(str, None), '--itemlist': Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg), '<identifier>': Or(str, None), '--retries': Use(lambda x: x[0]), '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--on-the-fly': Use(bool), '--no-change-timestamp': Use(bool), '--download-history': Use(bool), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), }) try: args = s.validate(args) if args['--glob'] and args['--format']: raise(SchemaError(None, '--glob and --format cannot be used together.')) except SchemaError as exc: print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) sys.exit(1) retries = int(args['--retries']) ids: list[File | str] | Search | TextIO if args['--itemlist']: with open(args['--itemlist']) as fp: ids = [x.strip() for x in fp] total_ids = len(ids) elif args['--search']: try: _search = session.search_items(args['--search'], params=args['--search-parameters']) total_ids = _search.num_found if total_ids == 0: print(f'error: the query "{args["--search"]}" returned no results', file=sys.stderr) sys.exit(1) ids = _search except ValueError as e: print(f'error: {e}', file=sys.stderr) sys.exit(1) # Download specific files. if args['<identifier>'] and args['<identifier>'] != '-': if '/' in args['<identifier>']: identifier = args['<identifier>'].split('/')[0] files = ['/'.join(args['<identifier>'].split('/')[1:])] else: identifier = args['<identifier>'] files = args['<file>'] total_ids = 1 ids = [identifier] elif args['<identifier>'] == '-': total_ids = 1 ids = sys.stdin files = None else: files = None errors = [] for i, identifier in enumerate(ids): if args['--stdout']: item = session.get_item(identifier) f = list(item.get_files(args['<file>'])) try: assert len(f) == 1 except AssertionError: print(f'error: {identifier}/{args["<file>"][0]} does not exist!', file=sys.stderr) sys.exit(1) stdout_buf = sys.stdout.buffer f[0].download(retries=args['--retries'], fileobj=stdout_buf, params=args['--parameters']) sys.exit(0) try: identifier = identifier.strip() except AttributeError: identifier = identifier.get('identifier') if total_ids > 1: item_index = f'{i + 1}/{total_ids}' else: item_index = None try: item = session.get_item(identifier) except Exception as exc: print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr) raise if 'You are attempting to make an HTTPS' in str(exc): print(f'\n{exc}', file=sys.stderr) sys.exit(1) else: continue # Otherwise, download the entire item. ignore_history_dir = True if not args['--download-history'] else False _errors = item.download( files=files, formats=args['--format'], glob_pattern=args['--glob'], dry_run=args['--dry-run'], verbose=not args['--quiet'], ignore_existing=args['--ignore-existing'], checksum=args['--checksum'], destdir=args['--destdir'], no_directory=args['--no-directories'], retries=retries, item_index=item_index, ignore_errors=True, on_the_fly=args['--on-the-fly'], no_change_timestamp=args['--no-change-timestamp'], params=args['--parameters'], ignore_history_dir=ignore_history_dir, ) if _errors: errors.append(_errors) if errors: # TODO: add option for a summary/report. sys.exit(1) else: sys.exit(0)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: bool, '<identifier>': list, '--modify': list, '--append': list, '--append-list': list, '--remove': list, '--spreadsheet': Or(None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) formats = set() responses = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print('{0} exists'.format(identifier)) else: responses.append(False) print('{0} does not exist'.format(identifier), file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif args['--modify'] or args['--append'] or args['--append-list'] \ or args['--remove']: if args['--modify']: metadata_args = args['--modify'] elif args['--append']: metadata_args = args['--append'] elif args['--append-list']: metadata_args = args['--append-list'] if args['--remove']: metadata_args = args['--remove'] try: metadata = get_args_dict(metadata_args) if any('/' in k for k in metadata): metadata = get_args_dict_many_write(metadata) except ValueError: print("error: The value of --modify, --remove, --append or --append-list " "is invalid. It must be formatted as: --modify=key:value", file=sys.stderr) sys.exit(1) if args['--remove']: responses.append(remove_metadata(item, metadata, args)) else: responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata = json.dumps(item.item_metadata) print(metadata) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = dict((k.lower(), v) for (k, v) in row.items() if v) responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) params = get_args_dict(args['--parameter'], query_string=True) row_types = { -1: 'done', 0: 'green', 1: 'blue', 2: 'red', 9: 'brown', } task_type = None if args['--green-rows']: task_type = 'green' elif args['--blue-rows']: task_type = 'blue' elif args['--red-rows']: task_type = 'red' try: try: if args['<identifier>']: tasks = session.get_tasks(identifier=args['<identifier>'], task_type=task_type, params=params) elif args['--get-task-log']: task = session.get_tasks(task_ids=args['--get-task-log'], params=params) if task: log = task[0].task_log() sys.exit(print(log)) else: print('error retrieving task-log ' 'for {0}\n'.format(args['--get-task-log']), file=sys.stderr) sys.exit(1) elif args['--task']: tasks = session.get_tasks(task_ids=args['--task'], params=params) else: tasks = session.get_tasks(task_type=task_type, params=params) except ValueError as exc: print('error: unable to parse JSON. have you run `ia configure`?'. format(exc), file=sys.stderr) sys.exit(1) for t in tasks: task_info = [ t.identifier, t.task_id, t.server, t.time, t.command, row_types[t.row_type], ] if args['--verbose']: # parse task args and append to task_info list. targs = '\t'.join( ['{0}={1}'.format(k, v) for (k, v) in t.args.items()]) task_info += [t.submitter, targs] print('\t'.join([str(x) for x in task_info])) except NameError as exc: print('error: {0}'.format(exc.message), file=sys.stderr) sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: bool, '<identifier>': list, '--modify': list, '--append': list, '--spreadsheet': Or( None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) formats = set() responses = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print('{0} exists'.format(identifier)) else: responses.append(False) print('{0} does not exist'.format(identifier), file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif args['--modify'] or args['--append']: metadata_args = args['--modify'] if args['--modify'] else args[ '--append'] metadata = get_args_dict(metadata_args) responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): sys.exit(0) else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata = json.dumps(item.item_metadata) print(metadata) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 with open(args['--spreadsheet'], 'rU') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = dict((k.lower(), v) for (k, v) in row.items() if v) responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): sys.exit(0) else: sys.exit(1) sys.exit(0)
def main(argv, session): if six.PY2: args = docopt(__doc__.encode('utf-8'), argv=argv) else: args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ str: Use(bool), '<identifier>': Or(None, And(str, validate_ia_identifier, error=('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, periods ".", ' 'underscores "_", or dashes "-". However, <identifier> cannot begin ' 'with periods, underscores, or dashes.'))), '<file>': And( Use(lambda l: l if not six.PY2 else convert_str_list_to_unicode(l)), And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error='--remote-name must be provided when uploading from stdin.')), '--remote-name': Or(None, Use(lambda x: x.decode(sys.getfilesystemencoding()) if six.PY2 else x)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: str(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Make sure the collection being uploaded to exists. collection_id = args['--metadata'].get('collection') if collection_id and not args['--no-collection-check'] and not args['--status-check']: if isinstance(collection_id, list): collection_id = collection_id[0] collection = session.get_item(collection_id) if not collection.exists: sys.stderr.write( 'You must upload to a collection that exists. ' '"{0}" does not exist.\n{1}\n'.format(collection_id, printable_usage(__doc__))) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format(args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] # Upload with backups turned on by default. if not args['--header'].get('x-archive-keep-old-version'): args['--header']['x-archive-keep-old-version'] = '1' queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False upload_kwargs = dict( metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, verbose=verbose, verify=args['--verify'], checksum=args['--checksum'], retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], ) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r.status_code) or (not _r.ok): ERRORS = True # Bulk upload using spreadsheet. else: # Use the same session for each upload request. with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) prev_identifier = None for row in spreadsheet: upload_kwargs_copy = deepcopy(upload_kwargs) local_file = row['file'] identifier = row['identifier'] del row['file'] del row['identifier'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = ['{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v] metadata = get_args_dict(md_args) upload_kwargs_copy['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True prev_identifier = identifier if ERRORS: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ str: Use(bool), '<identifier>': Or( None, And(str, validate_s3_identifier, error= ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, periods ".", ' 'underscores "_", or dashes "-". However, <identifier> cannot begin ' 'with periods, underscores, or dashes.'))), '<file>': And( And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error= '--remote-name must be provided when uploading from stdin.')), '--remote-name': Or(None, str), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--file-metadata': Or(None, os.path.isfile, error='--file-metadata should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: str(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) sys.exit(1) # Make sure the collection being uploaded to exists. collection_id = args['--metadata'].get('collection') if collection_id and not args['--no-collection-check'] and not args[ '--status-check']: if isinstance(collection_id, list): collection_id = collection_id[0] collection = session.get_item(collection_id) if not collection.exists: print( 'You must upload to a collection that exists. ' f'"{collection_id}" does not exist.\n{printable_usage(__doc__)}', file=sys.stderr) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print( f'warning: {args["<identifier>"]} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.', file=sys.stderr) sys.exit(1) else: print(f'success: {args["<identifier>"]} is accepting requests.', file=sys.stderr) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] # Upload with backups turned on by default. if not args['--header'].get( 'x-archive-keep-old-version') and not args['--no-backup']: args['--header']['x-archive-keep-old-version'] = '1' queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False if args['--file-metadata']: try: with open(args['--file-metadata']) as fh: args['<file>'] = json.load(fh) except JSONDecodeError: args['<file>'] = [] with open(args['--file-metadata']) as fh: for line in fh: j = json.loads(line.strip()) args['<file>'].append(j) upload_kwargs = { 'metadata': args['--metadata'], 'headers': args['--header'], 'debug': args['--debug'], 'queue_derive': queue_derive, 'verbose': verbose, 'verify': args['--verify'], 'checksum': args['--checksum'], 'retries': args['--retries'], 'retries_sleep': args['--sleep'], 'delete': args['--delete'], 'validate_identifier': True, } # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() # sys.stdin normally has the buffer attribute which returns bytes. # However, this might not always be the case, e.g. on mocking for test purposes. # Fall back to reading as str and encoding back to bytes. # Note that the encoding attribute might also be None. In that case, fall back to # locale.getpreferredencoding, the default of io.TextIOWrapper and open(). if hasattr(sys.stdin, 'buffer'): def read(): return sys.stdin.buffer.read(1048576) else: encoding = sys.stdin.encoding or getpreferredencoding(False) def read(): return sys.stdin.read(1048576).encode(encoding) while True: data = read() if not data: break local_file.write(data) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} elif args['--keep-directories']: files = {f: f for f in local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r.status_code) or (not _r.ok): ERRORS = True else: if args['--open-after-upload']: url = f'{session.protocol}//{session.host}/details/{item.identifier}' webbrowser.open_new_tab(url) # Bulk upload using spreadsheet. else: # Use the same session for each upload request. with open(args['--spreadsheet'], 'r', newline='', encoding='utf-8-sig') as csvfp: spreadsheet = csv.DictReader(csvfp) prev_identifier = None for row in spreadsheet: for metadata_key in row: if not is_valid_metadata_key(metadata_key): print( f'error: "{metadata_key}" is not a valid metadata key.', file=sys.stderr) sys.exit(1) upload_kwargs_copy = deepcopy(upload_kwargs) if row.get('REMOTE_NAME'): local_file = {row['REMOTE_NAME']: row['file']} del row['REMOTE_NAME'] elif args['--keep-directories']: local_file = {row['file']: row['file']} else: local_file = row['file'] identifier = row.get('item', row.get('identifier')) if not identifier: if not prev_identifier: print('error: no identifier column on spreadsheet.', file=sys.stderr) sys.exit(1) identifier = prev_identifier del row['file'] if 'identifier' in row: del row['identifier'] if 'item' in row: del row['item'] item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = [f'{k.lower()}:{v}' for (k, v) in row.items() if v] metadata = get_args_dict(md_args) upload_kwargs_copy['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r.status_code) or (not _r.ok): ERRORS = True else: if args['--open-after-upload']: url = f'{session.protocol}//{session.host}/details/{identifier}' webbrowser.open_new_tab(url) prev_identifier = identifier if ERRORS: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: bool, '<identifier>': list, '--modify': list, '--append': list, '--append-list': list, '--remove': list, '--spreadsheet': Or( None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) formats = set() responses = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print('{0} exists'.format(identifier)) else: responses.append(False) print('{0} does not exist'.format(identifier), file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif args['--modify'] or args['--append'] or args['--append-list'] \ or args['--remove']: if args['--modify']: metadata_args = args['--modify'] elif args['--append']: metadata_args = args['--append'] elif args['--append-list']: metadata_args = args['--append-list'] if args['--remove']: metadata_args = args['--remove'] try: metadata = get_args_dict(metadata_args) if any('/' in k for k in metadata): metadata = get_args_dict_many_write(metadata) except ValueError: print( "error: The value of --modify, --remove, --append or --append-list " "is invalid. It must be formatted as: --modify=key:value", file=sys.stderr) sys.exit(1) if args['--remove']: responses.append(remove_metadata(item, metadata, args)) else: responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata = json.dumps(item.item_metadata) print(metadata) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = dict((k.lower(), v) for (k, v) in row.items() if v) responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Tasks write API. if args['--cmd']: data = get_args_dict(args['--data'], query_string=True) task_args = get_args_dict(args['--task-args'], query_string=True) data['args'] = task_args r = session.submit_task(args['<identifier>'], args['--cmd'], comment=args['--comment'], priority=data.get('priority'), reduced_priority=args['--reduced-priority'], data=data) j = r.json() if j.get('success'): print('success: {}'.format(j.get('value', dict()).get('log'))) sys.exit(0) else: print('error: {}'.format(j.get('error'))) sys.exit(1) # Tasks read API. params = get_args_dict(args['--parameter'], query_string=True) if args['<identifier>']: _params = dict(identifier=args['<identifier>'], catalog=1, history=1) _params.update(params) params = _params elif args['--get-task-log']: log = session.get_task_log(args['--get-task-log'], params) if six.PY2: print(log.encode('utf-8', errors='surrogateescape')) else: print( log.encode('utf-8', errors='surrogateescape').decode('utf-8', errors='replace')) sys.exit(0) queryable_params = [ 'identifier', 'task_id', 'server', 'cmd', 'args', 'submitter', 'priority', 'wait_admin', 'submittime', ] if not args['<identifier>'] \ and not params.get('task_id'): _params = dict(catalog=1, history=0) _params.update(params) params = _params if not any(x in params for x in queryable_params): _params = dict(submitter=session.user_email, catalog=1, history=0, summary=0) _params.update(params) params = _params if args['--tab-output']: warn_msg = ( 'tab-delimited output will be removed in a future release. ' 'Please switch to the default JSON output.') warnings.warn(warn_msg) for t in session.get_tasks(params=params): # Legacy support for tab-delimted output. if args['--tab-output']: color = t.color if t.color else 'done' task_args = '\t'.join( ['{}={}'.format(k, v) for k, v in t.args.items()]) output = '\t'.join([ str(x) for x in [ t.identifier, t.task_id, t.server, t.submittime, t.cmd, color, t.submitter, task_args, ] if x ]) print(output) sys.stdout.flush() else: print(t.json()) sys.stdout.flush()
def main(argv, session): args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ six.text_type: Use(bool), '<identifier>': Or( None, And(str, validate_ia_identifier, error= ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, underscores ( _ ), or ' 'dashes ( - )'))), '<file>': And( And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error= '--remote-name must be provided when uploading from stdin.')), '--remote-name': Or(None, And(str)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: int(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format( args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False upload_kwargs = dict( metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, checksum=args['--checksum'], verbose=verbose, retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], ) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True # Bulk upload using spreadsheet. else: # Use the same session for each upload request. session = ArchiveSession() spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU')) prev_identifier = None for row in spreadsheet: local_file = row['file'] identifier = row['identifier'] del row['file'] del row['identifier'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = [ '{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v ] metadata = get_args_dict(md_args) upload_kwargs['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True prev_identifier = identifier if ERRORS: sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: Use(bool), '<identifier>': Or(None, And(str, validate_ia_identifier, error=('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, underscores ( _ ), or ' 'dashes ( - )'))), '<file>': And( And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error='--remote-name must be provided when uploading from stdin.')), '--remote-name': Or(None, And(str)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: int(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format(args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False upload_kwargs = dict( metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, checksum=args['--checksum'], verbose=verbose, retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], ) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file responses = _upload_files(item, files, upload_kwargs) # Bulk upload using spreadsheet. else: # Use the same session for each upload request. session = ArchiveSession() spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU')) prev_identifier = None responses = [] for row in spreadsheet: local_file = row['file'] identifier = row['identifier'] del row['file'] del row['identifier'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = ['{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v] metadata = get_args_dict(md_args) upload_kwargs['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs, prev_identifier, session, responses) responses += r prev_identifier = identifier if responses and not all(r and r.ok for r in responses): sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: bool, '<identifier>': list, '--modify': list, '--append': list, '--spreadsheet': Or(None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) formats = set() responses = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print('{0} exists'.format(identifier)) else: responses.append(False) print('{0} does not exist'.format(identifier), file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif args['--modify'] or args['--append']: metadata_args = args['--modify'] if args['--modify'] else args['--append'] metadata = get_args_dict(metadata_args) responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): sys.exit(0) else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata = json.dumps(item.item_metadata) print(metadata) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 spreadsheet = csv.DictReader(open(args['--spreadsheet'], 'rU')) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = dict((k.lower(), v) for (k, v) in row.items() if v) responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): sys.exit(0) else: sys.exit(1) sys.exit(0)
def main(argv, session): args = docopt(__doc__, argv=argv) # Tasks write API. if args['--cmd']: if args['--get-rate-limit']: r = session.get_tasks_api_rate_limit(args['--cmd']) print(json.dumps(r)) sys.exit(0) data = get_args_dict(args['--data'], query_string=True) task_args = get_args_dict(args['--task-args'], query_string=True) data['args'] = task_args r = session.submit_task(args['<identifier>'], args['--cmd'], comment=args['--comment'], priority=data.get('priority'), reduced_priority=args['--reduced-priority'], data=data) j = r.json() if j.get('success'): task_log_url = j.get('value', {}).get('log') print(f'success: {task_log_url}', file=sys.stderr) sys.exit(0) elif 'already queued/running' in j.get('error', ''): print(f'success: {args["--cmd"]} task already queued/running', file=sys.stderr) sys.exit(0) else: print(f'error: {j.get("error")}', file=sys.stderr) sys.exit(1) # Tasks read API. params = get_args_dict(args['--parameter'], query_string=True) if args['<identifier>']: _params = {'identifier': args['<identifier>'], 'catalog': 1, 'history': 1} _params.update(params) params = _params elif args['--get-task-log']: log = session.get_task_log(args['--get-task-log'], params) print(log.encode('utf-8', errors='surrogateescape') .decode('utf-8', errors='replace')) sys.exit(0) queryable_params = [ 'identifier', 'task_id', 'server', 'cmd', 'args', 'submitter', 'priority', 'wait_admin', 'submittime', ] if not (args['<identifier>'] or params.get('task_id')): _params = {'catalog': 1, 'history': 0} _params.update(params) params = _params if not any(x in params for x in queryable_params): _params = {'submitter': session.user_email, 'catalog': 1, 'history': 0, 'summary': 0} _params.update(params) params = _params if args['--tab-output']: warn_msg = ('tab-delimited output will be removed in a future release. ' 'Please switch to the default JSON output.') warnings.warn(warn_msg) for t in session.get_tasks(params=params): # Legacy support for tab-delimted output. if args['--tab-output']: color = t.color if t.color else 'done' task_args = '\t'.join([f'{k}={v}' for k, v in t.args.items()]) output = '\t'.join([str(x) for x in [ t.identifier, t.task_id, t.server, t.submittime, t.cmd, color, t.submitter, task_args, ] if x]) print(output) sys.stdout.flush() else: print(t.json()) sys.stdout.flush()
def main(argv, session): if six.PY2: args = docopt(__doc__.encode('utf-8'), argv=argv) else: args = docopt(__doc__, argv=argv) ERRORS = False # Validate args. s = Schema({ str: Use(bool), '<identifier>': Or( None, And(str, validate_ia_identifier, error= ('<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, periods ".", ' 'underscores "_", or dashes "-". However, <identifier> cannot begin ' 'with periods, underscores, or dashes.'))), '<file>': And( Use(lambda l: l if not six.PY2 else convert_str_list_to_unicode(l)), And(lambda f: all(os.path.exists(x) for x in f if x != '-'), error='<file> should be a readable file or directory.'), And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, error= '--remote-name must be provided when uploading from stdin.')), '--remote-name': Or( None, Use(lambda x: x.decode(sys.getfilesystemencoding()) if six.PY2 else x)), '--spreadsheet': Or(None, os.path.isfile, error='--spreadsheet should be a readable file.'), '--metadata': Or(None, And(Use(get_args_dict), dict), error='--metadata must be formatted as --metadata="key:value"'), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--retries': Use(lambda x: int(x[0]) if x else 0), '--sleep': Use(lambda l: int(l[0]), error='--sleep value must be an integer.'), '--size-hint': Or(Use(lambda l: str(l[0]) if l else None), int, None, error='--size-hint value must be an integer.'), '--status-check': bool, }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) # Make sure the collection being uploaded to exists. collection_id = args['--metadata'].get('collection') if collection_id and not args['--no-collection-check'] and not args[ '--status-check']: if isinstance(collection_id, list): collection_id = collection_id[0] collection = session.get_item(collection_id) if not collection.exists: sys.stderr.write('You must upload to a collection that exists. ' '"{0}" does not exist.\n{1}\n'.format( collection_id, printable_usage(__doc__))) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): print('warning: {0} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.'.format(args['<identifier>']), file=sys.stderr) sys.exit(1) else: print('success: {0} is accepting requests.'.format( args['<identifier>'])) sys.exit() elif args['<identifier>']: item = session.get_item(args['<identifier>']) # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] # Upload with backups turned on by default. if not args['--header'].get('x-archive-keep-old-version'): args['--header']['x-archive-keep-old-version'] = '1' queue_derive = True if args['--no-derive'] is False else False verbose = True if args['--quiet'] is False else False upload_kwargs = dict( metadata=args['--metadata'], headers=args['--header'], debug=args['--debug'], queue_derive=queue_derive, verbose=verbose, verify=args['--verify'], checksum=args['--checksum'], retries=args['--retries'], retries_sleep=args['--sleep'], delete=args['--delete'], ) # Upload files. if not args['--spreadsheet']: if args['-']: local_file = TemporaryFile() local_file.write(sys.stdin.read()) local_file.seek(0) else: local_file = args['<file>'] if isinstance(local_file, (list, tuple, set)) and args['--remote-name']: local_file = local_file[0] if args['--remote-name']: files = {args['--remote-name']: local_file} else: files = local_file for _r in _upload_files(item, files, upload_kwargs): if args['--debug']: break if (not _r.status_code) or (not _r.ok): ERRORS = True # Bulk upload using spreadsheet. else: # Use the same session for each upload request. with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) prev_identifier = None for row in spreadsheet: upload_kwargs_copy = deepcopy(upload_kwargs) local_file = row['file'] identifier = row['identifier'] del row['file'] del row['identifier'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) # TODO: Clean up how indexed metadata items are coerced # into metadata. md_args = [ '{0}:{1}'.format(k.lower(), v) for (k, v) in row.items() if v ] metadata = get_args_dict(md_args) upload_kwargs_copy['metadata'].update(metadata) r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier, session) for _r in r: if args['--debug']: break if (not _r) or (not _r.ok): ERRORS = True prev_identifier = identifier if ERRORS: sys.exit(1)
def main(argv, session=None): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ str: Use(bool), '<query>': Use(lambda x: ' '.join(x)), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--header': Or(None, And(Use(get_args_dict), dict), error='--header must be formatted as --header="key:value"'), '--sort': list, '--field': list, '--timeout': Use(lambda x: float(x[0]), error='--timeout must be integer or float.') }) try: args = s.validate(args) except SchemaError as exc: print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) sys.exit(1) # Support comma separated values. fields = list(chain.from_iterable([x.split(',') for x in args['--field']])) sorts = list(chain.from_iterable([x.split(',') for x in args['--sort']])) r_kwargs = { 'headers': args['--header'], 'timeout': args['--timeout'], } search = session.search_items(args['<query>'], fields=fields, sorts=sorts, params=args['--parameters'], full_text_search=args['--fts'], dsl_fts=args['--dsl-fts'], request_kwargs=r_kwargs) try: if args['--num-found']: print(search.num_found) sys.exit(0) for result in search: if args['--itemlist']: print(result.get('identifier', '')) else: j = json.dumps(result) print(j) if result.get('error'): sys.exit(1) except ValueError as e: print(f'error: {e}', file=sys.stderr) except ConnectTimeout as exc: print( 'error: Request timed out. Increase the --timeout and try again.', file=sys.stderr) sys.exit(1) except ReadTimeout as exc: print( 'error: The server timed out and failed to return all search results,' ' please try again', file=sys.stderr) sys.exit(1) except AuthenticationError as exc: print(f'error: {exc}', file=sys.stderr) sys.exit(1)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validation error messages. destdir_msg = '--destdir must be a valid path to a directory.' itemlist_msg = '--itemlist must be a valid path to an existing file.' # Validate args. s = Schema({ str: Use(bool), '--destdir': Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg), '--format': list, '--glob': Use(lambda l: l[0] if l else None), '<file>': list, '--search': Or(str, None), '--itemlist': Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg), '<identifier>': Or(str, None), '--retries': Use(lambda x: x[0]), '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--on-the-fly': Use(bool), '--no-change-timestamp': Use(bool), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), }) # Filenames should be unicode literals. Support PY2 and PY3. if six.PY2: args['<file>'] = [f.decode('utf-8') for f in args['<file>']] try: args = s.validate(args) if args['--glob']: if args['--format']: raise(SchemaError(None, '--glob and --format cannot be used together.')) except SchemaError as exc: sys.stderr.write('{0}\n{1}\n'.format( str(exc), printable_usage(__doc__))) sys.exit(1) retries = int(args['--retries']) if args['--itemlist']: with open(args['--itemlist']) as fp: ids = [x.strip() for x in fp] total_ids = len(ids) elif args['--search']: try: _search = session.search_items(args['--search'], params=args['--search-parameters']) total_ids = _search.num_found if total_ids == 0: print('error: the query "{0}" ' 'returned no results'.format(args['--search']), file=sys.stderr) sys.exit(1) ids = _search except ValueError as e: print('error: {0}'.format(e), file=sys.stderr) sys.exit(1) # Download specific files. if args['<identifier>'] and args['<identifier>'] != '-': if '/' in args['<identifier>']: identifier = args['<identifier>'].split('/')[0] files = ['/'.join(args['<identifier>'].split('/')[1:])] else: identifier = args['<identifier>'] files = args['<file>'] total_ids = 1 ids = [identifier] elif args['<identifier>'] == '-': total_ids = 1 ids = sys.stdin files = None else: files = None errors = list() for i, identifier in enumerate(ids): if args['--stdout']: item = session.get_item(identifier) f = list(item.get_files(args['<file>'])) try: assert len(f) == 1 except AssertionError: sys.stderr.write('error: {0}/{1} does not exist!\n'.format( identifier, args['<file>'][0])) sys.exit(1) if six.PY2: stdout_buf = sys.stdout else: stdout_buf = sys.stdout.buffer f[0].download(retries=args['--retries'], fileobj=stdout_buf, params=args['--parameters']) sys.exit(0) try: identifier = identifier.strip() except AttributeError: identifier = identifier.get('identifier') if total_ids > 1: item_index = '{0}/{1}'.format((i + 1), total_ids) else: item_index = None try: item = session.get_item(identifier) except Exception as exc: print('{0}: failed to retrieve item metadata - errors'.format(identifier), file=sys.stderr) if 'You are attempting to make an HTTPS' in str(exc): print('\n{0}'.format(exc), file=sys.stderr) sys.exit(1) else: continue # Otherwise, download the entire item. _errors = item.download( files=files, formats=args['--format'], glob_pattern=args['--glob'], dry_run=args['--dry-run'], verbose=args['--verbose'], silent=args['--silent'], ignore_existing=args['--ignore-existing'], checksum=args['--checksum'], destdir=args['--destdir'], no_directory=args['--no-directories'], retries=retries, item_index=item_index, ignore_errors=True, on_the_fly=args['--on-the-fly'], no_change_timestamp=args['--no-change-timestamp'], params=args['--parameters'] ) if _errors: errors.append(_errors) if errors: # TODO: add option for a summary/report. sys.exit(1) else: sys.exit(0)
def main(argv, session): args = docopt(__doc__, argv=argv) # Validation error messages. destdir_msg = '--destdir must be a valid path to a directory.' itemlist_msg = '--itemlist must be a valid path to an existing file.' # Validate args. s = Schema({ str: Use(bool), '--destdir': Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg), '--format': list, '--glob': Use(lambda l: l[0] if l else None), '<file>': list, '--search': Or(str, None), '--itemlist': Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg), '<identifier>': Or(str, None), '--retries': Use(lambda x: x[0]), '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--on-the-fly': Use(bool), '--no-change-timestamp': Use(bool) }) # Filenames should be unicode literals. Support PY2 and PY3. if six.PY2: args['<file>'] = [f.decode('utf-8') for f in args['<file>']] try: args = s.validate(args) if args['--glob']: if args['--format']: raise (SchemaError( None, '--glob and --format cannot be used together.')) except SchemaError as exc: sys.stderr.write('{0}\n{1}\n'.format(str(exc), printable_usage(__doc__))) sys.exit(1) retries = int(args['--retries']) if args['--itemlist']: with open(args['--itemlist']) as fp: ids = [x.strip() for x in fp] total_ids = len(ids) elif args['--search']: try: _search = session.search_items(args['--search'], params=args['--search-parameters']) total_ids = _search.num_found if total_ids == 0: print('error: the query "{0}" ' 'returned no results'.format(args['--search']), file=sys.stderr) sys.exit(1) ids = _search except ValueError as e: print('error: {0}'.format(e), file=sys.stderr) sys.exit(1) # Download specific files. if args['<identifier>'] and args['<identifier>'] != '-': if '/' in args['<identifier>']: identifier = args['<identifier>'].split('/')[0] files = ['/'.join(args['<identifier>'].split('/')[1:])] else: identifier = args['<identifier>'] files = args['<file>'] total_ids = 1 ids = [identifier] elif args['<identifier>'] == '-': total_ids = 1 ids = sys.stdin files = None else: files = None errors = list() for i, identifier in enumerate(ids): if args['--stdout']: item = session.get_item(identifier) f = list(item.get_files(args['<file>'])) try: assert len(f) == 1 except AssertionError: sys.stderr.write('error: {0}/{1} does not exist!\n'.format( identifier, args['<file>'][0])) sys.exit(1) if six.PY2: stdout_buf = sys.stdout else: stdout_buf = sys.stdout.buffer f[0].download(retries=args['--retries'], fileobj=stdout_buf) sys.exit(0) try: identifier = identifier.strip() except AttributeError: identifier = identifier.get('identifier') if total_ids > 1: item_index = '{0}/{1}'.format((i + 1), total_ids) else: item_index = None try: item = session.get_item(identifier) except Exception as exc: print('{0}: failed to retrieve item metadata - errors'.format( identifier), file=sys.stderr) if 'You are attempting to make an HTTPS' in str(exc): print('\n{0}'.format(exc), file=sys.stderr) sys.exit(1) else: continue # Otherwise, download the entire item. _errors = item.download( files=files, formats=args['--format'], glob_pattern=args['--glob'], dry_run=args['--dry-run'], verbose=args['--verbose'], silent=args['--silent'], ignore_existing=args['--ignore-existing'], checksum=args['--checksum'], destdir=args['--destdir'], no_directory=args['--no-directories'], retries=retries, item_index=item_index, ignore_errors=True, on_the_fly=args['--on-the-fly'], no_change_timestamp=args['--no-change-timestamp']) if _errors: errors.append(_errors) if errors: # TODO: add option for a summary/report. sys.exit(1) else: sys.exit(0)
def main(argv: dict, session: session.ArchiveSession) -> None: args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ str: bool, '<identifier>': list, '--modify': list, '--header': Or(None, And(Use(get_args_header_dict), dict), error='--header must be formatted as --header="key:value"'), '--append': list, '--append-list': list, '--remove': list, '--spreadsheet': Or( None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) sys.exit(1) formats = set() responses: list[bool | Response] = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print(f'{identifier} exists', file=sys.stderr) else: responses.append(False) print(f'{identifier} does not exist', file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif (args['--modify'] or args['--append'] or args['--append-list'] or args['--remove']): if args['--modify']: metadata_args = args['--modify'] elif args['--append']: metadata_args = args['--append'] elif args['--append-list']: metadata_args = args['--append-list'] if args['--remove']: metadata_args = args['--remove'] try: metadata = get_args_dict(metadata_args) if any('/' in k for k in metadata): metadata = get_args_dict_many_write(metadata) except ValueError: print( 'error: The value of --modify, --remove, --append or --append-list ' 'is invalid. It must be formatted as: --modify=key:value', file=sys.stderr) sys.exit(1) if args['--remove']: responses.append(remove_metadata(item, metadata, args)) else: responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): # type: ignore sys.exit(0) else: for r in responses: assert isinstance(r, Response) if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.text: continue else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata_str = json.dumps(item.item_metadata) print(metadata_str) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 with open(args['--spreadsheet'], newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = {k.lower(): v for k, v in row.items() if v} responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): # type: ignore sys.exit(0) else: for r in responses: assert isinstance(r, Response) if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.text: continue else: sys.exit(1)