예제 #1
0
def test_ia_search_itemlist(capsys):
    test_scrape_response = load_test_data_file('scrape_response.json')

    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
        url1 = (f'{PROTOCOL}//archive.org/services/search/v1/scrape'
                '?q=collection%3Aattentionkmartshoppers'
                '&count=10000')
        url2 = (f'{PROTOCOL}//archive.org/services/search/v1/scrape?'
                'cursor=W3siaWRlbnRpZmllciI6IjE5NjEtTC0wNTkxNCJ9XQ%3D%3D'
                '&q=collection%3Aattentionkmartshoppers'
                '&count=10000')
        rsps.add(responses.POST,
                 url1,
                 body=test_scrape_response,
                 match_querystring=True)
        _j = json.loads(test_scrape_response)
        del _j['cursor']
        _r = json.dumps(_j)
        rsps.add(responses.POST, url2, body=_r, match_querystring=True)
        ia_call([
            'ia', 'search', 'collection:attentionkmartshoppers', '--itemlist'
        ])

    out, err = capsys.readouterr()
    assert len(set(out.split())) == 100
예제 #2
0
 def insert_test_txt(body):
     body = json.loads(body)
     body['files'].append({
         'name': 'test.txt',
         'md5': 'acbd18db4cc2f85cedef654fccc4a4d8'
     })
     return json.dumps(body)
예제 #3
0
 def review(self, title, body, stars=None):
     u = f'{self.session.protocol}//{self.session.host}/services/reviews.php'
     p = {'identifier': self.identifier}
     d = {'title': title, 'body': body}
     if stars:
         d['stars'] = stars
     a = S3Auth(self.session.access_key, self.session.secret_key)
     r = self.session.post(u, params=p, data=json.dumps(d), auth=a)
     r.raise_for_status()
     return r
예제 #4
0
 def __hash__(self):
     without_excluded_keys = {
         k: v
         for k, v in self.item_metadata.items()
         if k not in self.EXCLUDED_ITEM_METADATA_KEYS
     }
     return hash(
         json.dumps(without_excluded_keys,
                    sort_keys=True,
                    check_circular=False))
예제 #5
0
    def remove_from_simplelist(self, parent, list):
        """Remove item from a simplelist.

        :rtype: :class:`requests.Response`
        """
        patch = {
            'op': 'delete',
            'parent': parent,
            'list': list,
        }
        data = {
            '-patch': json.dumps(patch),
            '-target': 'simplelists',
        }
        r = self.session.post(self.urls.metadata, data=data)
        return r
예제 #6
0
def test_download_dark_item(tmpdir, capsys, nasa_metadata, session):
    tmpdir.chdir()
    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
        nasa_metadata['metadata']['identifier'] = 'dark-item'
        nasa_metadata['is_dark'] = True
        _item_metadata = json.dumps(nasa_metadata)
        rsps.add(responses.GET,
                 f'{PROTOCOL}//archive.org/metadata/dark-item',
                 body=_item_metadata,
                 content_type='application/json')
        _item = session.get_item('dark-item')
        rsps.add(responses.GET,
                 DOWNLOAD_URL_RE,
                 body='no dest dir',
                 status=403,
                 adding_headers={'content-length': '100'})
        _item.download(files='nasa_meta.xml', verbose=True)
        out, err = capsys.readouterr()
        assert 'skipping dark-item, item is dark' in err
예제 #7
0
 def _prepare_metadata_headers(prepared_metadata, meta_type='meta'):
     for meta_key, meta_value in prepared_metadata.items():
         # Encode arrays into JSON strings because Archive.org does not
         # yet support complex metadata structures in
         # <identifier>_meta.xml.
         if isinstance(meta_value, dict):
             meta_value = json.dumps(meta_value)
         # Convert the metadata value into a list if it is not already
         # iterable.
         if (isinstance(meta_value, str) or not hasattr(meta_value, '__iter__')):
             meta_value = [meta_value]
         # Convert metadata items into HTTP headers and add to
         # ``headers`` dict.
         for i, value in enumerate(meta_value):
             if not value:
                 continue
             header_key = f'x-archive-{meta_type}{i:02d}-{meta_key}'
             if (isinstance(value, str) and needs_quote(value)):
                 value = f'uri({quote(value)})'
             # because rfc822 http headers disallow _ in names, IA-S3 will
             # translate two hyphens in a row (--) into an underscore (_).
             header_key = header_key.replace('_', '--')
             headers[header_key] = value
예제 #8
0
def test_ia_upload_status_check(capsys):
    with IaRequestsMock() as rsps:
        rsps.add(responses.GET,
                 f'{PROTOCOL}//s3.us.archive.org',
                 body=STATUS_CHECK_RESPONSE,
                 content_type='application/json')

        ia_call(['ia', 'upload', 'nasa', '--status-check'])
        out, err = capsys.readouterr()
        assert 'success: nasa is accepting requests.' in err

        j = json.loads(STATUS_CHECK_RESPONSE)
        j['over_limit'] = 1
        rsps.reset()
        rsps.add(responses.GET,
                 f'{PROTOCOL}//s3.us.archive.org',
                 body=json.dumps(j),
                 content_type='application/json')

        ia_call(['ia', 'upload', 'nasa', '--status-check'],
                expected_exit_code=1)
        out, err = capsys.readouterr()
        assert ('warning: nasa is over limit, and not accepting requests. '
                'Expect 503 SlowDown errors.') in err
예제 #9
0
def main(argv, session=None):
    args = docopt(__doc__, argv=argv)

    # Validate args.
    s = Schema({
        str:
        Use(bool),
        '<query>':
        Use(lambda x: ' '.join(x)),
        '--parameters':
        Use(lambda x: get_args_dict(x, query_string=True)),
        '--header':
        Or(None,
           And(Use(get_args_dict), dict),
           error='--header must be formatted as --header="key:value"'),
        '--sort':
        list,
        '--field':
        list,
        '--timeout':
        Use(lambda x: float(x[0]), error='--timeout must be integer or float.')
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr)
        sys.exit(1)

    # Support comma separated values.
    fields = list(chain.from_iterable([x.split(',') for x in args['--field']]))
    sorts = list(chain.from_iterable([x.split(',') for x in args['--sort']]))

    r_kwargs = {
        'headers': args['--header'],
        'timeout': args['--timeout'],
    }

    search = session.search_items(args['<query>'],
                                  fields=fields,
                                  sorts=sorts,
                                  params=args['--parameters'],
                                  full_text_search=args['--fts'],
                                  dsl_fts=args['--dsl-fts'],
                                  request_kwargs=r_kwargs)

    try:
        if args['--num-found']:
            print(search.num_found)
            sys.exit(0)

        for result in search:
            if args['--itemlist']:
                print(result.get('identifier', ''))
            else:
                j = json.dumps(result)
                print(j)
                if result.get('error'):
                    sys.exit(1)
    except ValueError as e:
        print(f'error: {e}', file=sys.stderr)
    except ConnectTimeout as exc:
        print(
            'error: Request timed out. Increase the --timeout and try again.',
            file=sys.stderr)
        sys.exit(1)
    except ReadTimeout as exc:
        print(
            'error: The server timed out and failed to return all search results,'
            ' please try again',
            file=sys.stderr)
        sys.exit(1)
    except AuthenticationError as exc:
        print(f'error: {exc}', file=sys.stderr)
        sys.exit(1)
예제 #10
0
 def json(self):
     return json.dumps(self.task_dict)
예제 #11
0
def test_modify_metadata(nasa_item, nasa_metadata):
    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
        rsps.add(responses.POST, f'{PROTOCOL}//archive.org/metadata/nasa')

        # Test simple add.
        md = {'foo': 'bar'}
        p = nasa_item.modify_metadata(md, debug=True)
        _patch = json.dumps([
            {
                'add': '/foo',
                'value': 'bar'
            },
        ])
        expected_data = {
            'priority': -5,
            '-target': 'metadata',
            '-patch': _patch,
        }
        assert set(p.data.keys()) == set(expected_data.keys())
        assert p.data['priority'] == expected_data['priority']
        assert p.data['-target'] == expected_data['-target']
        assert all(v in p.data['-patch'] for v in ['/foo', 'bar'])
        # Test no changes.
        md = {'title': 'NASA Images'}
        p = nasa_item.modify_metadata(md, debug=True)
        expected_data = {'priority': -5, '-target': 'metadata', '-patch': '[]'}
        assert p.data == expected_data

        md = {'title': 'REMOVE_TAG'}
        p = nasa_item.modify_metadata(md, debug=True)
        expected_data = {
            'priority': -5,
            '-target': 'metadata',
            '-patch': json.dumps([{
                'remove': '/title'
            }])
        }
        assert set(p.data.keys()) == set(expected_data.keys())
        assert p.data['priority'] == expected_data['priority']
        assert p.data['-target'] == expected_data['-target']
        assert '/title' in str(p.data['-patch'])
        assert 'remove' in str(p.data['-patch'])

        # Test add array.
        md = {'subject': ['one', 'two', 'last']}
        p = nasa_item.modify_metadata(md, debug=True, priority=-1)
        expected_data = {
            'priority':
            -1,
            '-target':
            'metadata',
            '-patch':
            json.dumps([{
                'add': '/subject',
                'value': ['one', 'two', 'last']
            }])
        }
        assert set(p.data.keys()) == set(expected_data.keys())
        assert p.data['priority'] == expected_data['priority']
        assert p.data['-target'] == expected_data['-target']
        assert '["one", "two", "last"]' in str(p.data['-patch']) \
               or '["one","two","last"]' in str(p.data['-patch'])

        # Test indexed mod.
        nasa_item.item_metadata['metadata']['subject'] = [
            'first', 'middle', 'last'
        ]
        md = {'subject[2]': 'new first'}
        p = nasa_item.modify_metadata(md, debug=True)
        expected_data = {
            'priority': -5,
            '-target': 'metadata',
            '-patch': json.dumps([{
                'value': 'new first',
                'replace': '/subject/2'
            }])
        }

        # Avoid comparing the json strings, because they are not in a canonical form
        assert set(p.data.keys()) == set(expected_data.keys())
        assert all(p.data[k] == expected_data[k]
                   for k in ['priority', '-target'])
        assert '/subject/2' in p.data['-patch'] or r'\/subject\/2' in p.data[
            '-patch']

        # Test priority.
        md = {'title': 'NASA Images'}
        p = nasa_item.modify_metadata(md, priority=3, debug=True)
        expected_data = {'priority': 3, '-target': 'metadata', '-patch': '[]'}
        assert p.data == expected_data

        # Test auth.
        md = {'title': 'NASA Images'}
        p = nasa_item.modify_metadata(md,
                                      access_key='a',
                                      secret_key='b',
                                      debug=True)
        assert 'access=a' in p.body
        assert 'secret=b' in p.body

        # Test change.
        md = {'title': 'new title'}
        nasa_metadata['metadata']['title'] = 'new title'
        _item_metadata = json.dumps(nasa_metadata)
        rsps.add(responses.GET,
                 f'{PROTOCOL}//archive.org/metadata/nasa',
                 body=_item_metadata)
        nasa_item.modify_metadata(md, access_key='a', secret_key='b')
        # Test that item re-initializes
        assert nasa_item.metadata['title'] == 'new title'
예제 #12
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Tasks write API.
    if args['--cmd']:
        if args['--get-rate-limit']:
            r = session.get_tasks_api_rate_limit(args['--cmd'])
            print(json.dumps(r))
            sys.exit(0)
        data = get_args_dict(args['--data'], query_string=True)
        task_args = get_args_dict(args['--task-args'], query_string=True)
        data['args'] = task_args
        r = session.submit_task(args['<identifier>'],
                                args['--cmd'],
                                comment=args['--comment'],
                                priority=data.get('priority'),
                                reduced_priority=args['--reduced-priority'],
                                data=data)
        j = r.json()
        if j.get('success'):
            task_log_url = j.get('value', {}).get('log')
            print(f'success: {task_log_url}', file=sys.stderr)
            sys.exit(0)
        elif 'already queued/running' in j.get('error', ''):
            print(f'success: {args["--cmd"]} task already queued/running', file=sys.stderr)
            sys.exit(0)
        else:
            print(f'error: {j.get("error")}', file=sys.stderr)
            sys.exit(1)

    # Tasks read API.
    params = get_args_dict(args['--parameter'], query_string=True)
    if args['<identifier>']:
        _params = {'identifier': args['<identifier>'], 'catalog': 1, 'history': 1}
        _params.update(params)
        params = _params
    elif args['--get-task-log']:
        log = session.get_task_log(args['--get-task-log'], params)
        print(log.encode('utf-8', errors='surrogateescape')
                 .decode('utf-8', errors='replace'))
        sys.exit(0)

    queryable_params = [
        'identifier',
        'task_id',
        'server',
        'cmd',
        'args',
        'submitter',
        'priority',
        'wait_admin',
        'submittime',
    ]

    if not (args['<identifier>']
            or params.get('task_id')):
        _params = {'catalog': 1, 'history': 0}
        _params.update(params)
        params = _params

    if not any(x in params for x in queryable_params):
        _params = {'submitter': session.user_email, 'catalog': 1, 'history': 0, 'summary': 0}
        _params.update(params)
        params = _params

    if args['--tab-output']:
        warn_msg = ('tab-delimited output will be removed in a future release. '
                    'Please switch to the default JSON output.')
        warnings.warn(warn_msg)
    for t in session.get_tasks(params=params):
        # Legacy support for tab-delimted output.
        if args['--tab-output']:
            color = t.color if t.color else 'done'
            task_args = '\t'.join([f'{k}={v}' for k, v in t.args.items()])
            output = '\t'.join([str(x) for x in [
                t.identifier,
                t.task_id,
                t.server,
                t.submittime,
                t.cmd,
                color,
                t.submitter,
                task_args,
            ] if x])
            print(output)
            sys.stdout.flush()
        else:
            print(t.json())
            sys.stdout.flush()
예제 #13
0
    def prepare_body(self, metadata, source_metadata, target, priority, append,
                     append_list):
        priority = -5 if not priority else priority

        if not source_metadata:
            r = requests.get(self.url)
            source_metadata = r.json()

        # Write to many targets
        if (isinstance(metadata, list) or any('/' in k for k in metadata)
                or all(isinstance(k, dict) for k in metadata.values())):
            changes = []

            if any(not k for k in metadata):
                raise ValueError('Invalid metadata provided, '
                                 'check your input and try again')

            if target:
                metadata = {target: metadata}
            for key in metadata:
                if key == 'metadata':
                    try:
                        patch = prepare_patch(metadata[key],
                                              source_metadata['metadata'],
                                              append, append_list)
                    except KeyError:
                        raise ItemLocateError
                elif key.startswith('files'):
                    patch = prepare_files_patch(metadata[key],
                                                source_metadata['files'],
                                                append, key, append_list)
                else:
                    key = key.split('/')[0]
                    patch = prepare_target_patch(metadata, source_metadata,
                                                 append, target, append_list,
                                                 key)
                changes.append({'target': key, 'patch': patch})
            self.data = {
                '-changes': json.dumps(changes),
                'priority': priority,
            }
            logger.debug(f'submitting metadata request: {self.data}')
        # Write to single target
        else:
            if not target or 'metadata' in target:
                target = 'metadata'
                try:
                    patch = prepare_patch(metadata,
                                          source_metadata['metadata'], append,
                                          append_list)
                except KeyError:
                    raise ItemLocateError
            elif 'files' in target:
                patch = prepare_files_patch(metadata, source_metadata['files'],
                                            append, target, append_list)
            else:
                metadata = {target: metadata}
                patch = prepare_target_patch(metadata, source_metadata, append,
                                             target, append_list, target)
            self.data = {
                '-patch': json.dumps(patch),
                '-target': target,
                'priority': priority,
            }
            logger.debug(f'submitting metadata request: {self.data}')
        super().prepare_body(self.data, None)
예제 #14
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Validate args.
    s = Schema({
        str: bool,
        '<identifier>': list,
        '--modify': list,
        '--header': Or(None, And(Use(get_args_header_dict), dict),
               error='--header must be formatted as --header="key:value"'),
        '--append': list,
        '--append-list': list,
        '--remove': list,
        '--spreadsheet': Or(None, And(lambda f: os.path.exists(f),
                            error='<file> should be a readable file or directory.')),
        '--target': Or(None, str),
        '--priority': Or(None, Use(int, error='<priority> should be an integer.')),
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr)
        sys.exit(1)

    formats = set()
    responses = []

    for i, identifier in enumerate(args['<identifier>']):
        item = session.get_item(identifier)

        # Check existence of item.
        if args['--exists']:
            if item.exists:
                responses.append(True)
                print(f'{identifier} exists', file=sys.stderr)
            else:
                responses.append(False)
                print(f'{identifier} does not exist', file=sys.stderr)
            if (i + 1) == len(args['<identifier>']):
                if all(r is True for r in responses):
                    sys.exit(0)
                else:
                    sys.exit(1)

        # Modify metadata.
        elif (args['--modify'] or args['--append'] or args['--append-list']
              or args['--remove']):
            if args['--modify']:
                metadata_args = args['--modify']
            elif args['--append']:
                metadata_args = args['--append']
            elif args['--append-list']:
                metadata_args = args['--append-list']
            if args['--remove']:
                metadata_args = args['--remove']
            try:
                metadata = get_args_dict(metadata_args)
                if any('/' in k for k in metadata):
                    metadata = get_args_dict_many_write(metadata)
            except ValueError:
                print('error: The value of --modify, --remove, --append or --append-list '
                      'is invalid. It must be formatted as: --modify=key:value',
                      file=sys.stderr)
                sys.exit(1)

            if args['--remove']:
                responses.append(remove_metadata(item, metadata, args))
            else:
                responses.append(modify_metadata(item, metadata, args))
            if (i + 1) == len(args['<identifier>']):
                if all(r.status_code == 200 for r in responses):
                    sys.exit(0)
                else:
                    for r in responses:
                        if r.status_code == 200:
                            continue
                        # We still want to exit 0 if the non-200 is a
                        # "no changes to xml" error.
                        elif 'no changes' in r.content.decode('utf-8'):
                            continue
                        else:
                            sys.exit(1)

        # Get metadata.
        elif args['--formats']:
            for f in item.get_files():
                formats.add(f.format)
            if (i + 1) == len(args['<identifier>']):
                print('\n'.join(formats))

        # Dump JSON to stdout.
        else:
            metadata = json.dumps(item.item_metadata)
            print(metadata)

    # Edit metadata for items in bulk, using a spreadsheet as input.
    if args['--spreadsheet']:
        if not args['--priority']:
            args['--priority'] = -5
        with open(args['--spreadsheet'], 'r', newline='', encoding='utf-8') as csvfp:
            spreadsheet = csv.DictReader(csvfp)
            responses = []
            for row in spreadsheet:
                if not row['identifier']:
                    continue
                item = session.get_item(row['identifier'])
                if row.get('file'):
                    del row['file']
                metadata = {k.lower(): v for k, v in row.items() if v}
                responses.append(modify_metadata(item, metadata, args))

            if all(r.status_code == 200 for r in responses):
                sys.exit(0)
            else:
                for r in responses:
                    if r.status_code == 200:
                        continue
                    # We still want to exit 0 if the non-200 is a
                    # "no changes to xml" error.
                    elif 'no changes' in r.content.decode('utf-8'):
                        continue
                    else:
                        sys.exit(1)