def __init__(self, item, name, file_metadata=None): """ :type item: Item :param item: The item that the file is part of. :type name: str :param name: The filename of the file. :type file_metadata: dict :param file_metadata: (optional) a dict of metadata for the given fille. """ if six.PY2: try: name = name.decode('utf-8') except UnicodeEncodeError: pass super(File, self).__init__(item.item_metadata, name, file_metadata) self.item = item url_parts = dict( protocol=item.session.protocol, id=self.identifier, name=urllib.parse.quote(name.encode('utf-8')), host=item.session.host, ) self.url = '{protocol}//{host}/download/{id}/{name}'.format( **url_parts) if self.item.session.access_key and self.item.session.secret_key: self.auth = auth.S3Auth(self.item.session.access_key, self.item.session.secret_key) else: self.auth = None
def get_task_log(task_id, session, request_kwargs=None): """Static method for getting a task log, given a task_id. This method exists so a task log can be retrieved without retrieving the items task history first. :type task_id: str or int :param task_id: The task id for the task log you'd like to fetch. :type archive_session: :class:`ArchiveSession <ArchiveSession>` :type request_kwargs: dict :param request_kwargs: (optional) Keyword arguments that :py:class:`requests.Request` takes. :rtype: str :returns: The task log as a string. """ request_kwargs = request_kwargs if request_kwargs else dict() _auth = auth.S3Auth(session.access_key, session.secret_key) if session.host == 'archive.org': host = 'catalogd.archive.org' else: host = session.host url = '{}//{}/services/tasks.php'.format(session.protocol, host) params = dict(task_log=task_id) r = session.get(url, params=params, auth=_auth, **request_kwargs) r.raise_for_status() return r.content.decode('utf-8', errors='surrogateescape')
def get_metadata(self, identifier: str, request_kwargs: MutableMapping | None = None): """Get an item's metadata from the `Metadata API <http://blog.archive.org/2013/07/04/metadata-api/>`__ :param identifier: Globally unique Archive.org identifier. :returns: Metadat API response. """ request_kwargs = request_kwargs or {} url = f'{self.protocol}//{self.host}/metadata/{identifier}' if 'timeout' not in request_kwargs: request_kwargs['timeout'] = 12 try: if self.access_key and self.secret_key: s3_auth = auth.S3Auth(self.access_key, self.secret_key) else: s3_auth = None resp = self.get(url, auth=s3_auth, **request_kwargs) resp.raise_for_status() except Exception as exc: error_msg = f'Error retrieving metadata from {url}, {exc}' logger.error(error_msg) raise type(exc)(error_msg) return resp.json()
def __init__(self, item, name, file_metadata=None): """ :type item: Item :param item: The item that the file is part of. :type name: str :param name: The filename of the file. :type file_metadata: dict :param file_metadata: (optional) a dict of metadata for the given file. """ super().__init__(item.item_metadata, name, file_metadata) self.item = item url_parts = { 'protocol': item.session.protocol, 'id': self.identifier, 'name': quote(name.encode('utf-8')), 'host': item.session.host, } self.url = '{protocol}//{host}/download/{id}/{name}'.format(**url_parts) if self.item.session.access_key and self.item.session.secret_key: self.auth = auth.S3Auth(self.item.session.access_key, self.item.session.secret_key) else: self.auth = None
def get_metadata(self, identifier, request_kwargs=None): """Get an item's metadata from the `Metadata API <http://blog.archive.org/2013/07/04/metadata-api/>`__ :type identifier: str :param identifier: Globally unique Archive.org identifier. :rtype: dict :returns: Metadat API response. """ request_kwargs = {} if not request_kwargs else request_kwargs url = '{0}//{1}/metadata/{2}'.format(self.protocol, self.host, identifier) if 'timeout' not in request_kwargs: request_kwargs['timeout'] = 12 try: if self.access_key and self.secret_key: s3_auth = auth.S3Auth(self.access_key, self.secret_key) else: s3_auth = None resp = self.get(url, auth=s3_auth, **request_kwargs) resp.raise_for_status() except Exception as exc: error_msg = 'Error retrieving metadata from {0}, {1}'.format( url, exc) logger.error(error_msg) raise type(exc)(error_msg) return resp.json()
def get_auth_config(username, password): payload = dict( username=username, password=password, remember='CHECKED', action='login', ) with requests.Session() as s: # Attache logged-in-* cookies to Session. u = 'https://archive.org/account/login.php' r = s.post(u, data=payload, cookies={'test-cookie': '1'}) if 'logged-in-sig' not in s.cookies: raise AuthenticationError( 'Authentication failed. ' 'Please check your credentials and try again.') # Get S3 keys. u = 'https://archive.org/account/s3.php' p = dict(output_json=1) r = s.get(u, params=p) j = r.json() access_key = j['key']['s3accesskey'] secret_key = j['key']['s3secretkey'] if not j or not j.get('key'): raise AuthenticationError( 'Authentication failed. ' 'Please check your credentials and try again.') # Get user info (screenname). u = 'https://s3.us.archive.org' p = dict(check_auth=1) r = requests.get(u, params=p, auth=auth.S3Auth(access_key, secret_key)) r.raise_for_status() j = r.json() if j.get('error'): raise AuthenticationError(j.get('error')) user_info = j['screenname'] auth_config = { 's3': { 'access': access_key, 'secret': secret_key, }, 'cookies': { 'logged-in-user': s.cookies['logged-in-user'], 'logged-in-sig': s.cookies['logged-in-sig'], }, 'general': { 'screenname': user_info, } } return auth_config
def __init__(self, metadata=None, queue_derive=True, access_key=None, secret_key=None, **kwargs): super(S3Request, self).__init__(**kwargs) if not self.auth: self.auth = auth.S3Auth(access_key, secret_key) # Default empty dicts for dict params. metadata = {} if metadata is None else metadata self.metadata = metadata self.queue_derive = queue_derive
def get_user_info(access_key, secret_key): """Returns details about an Archive.org user given an IA-S3 key pair. :type access_key: str :param access_key: IA-S3 access_key to use when making the given request. :type secret_key: str :param secret_key: IA-S3 secret_key to use when making the given request. """ u = 'https://s3.us.archive.org' p = dict(check_auth=1) r = requests.get(u, params=p, auth=auth.S3Auth(access_key, secret_key)) r.raise_for_status() j = r.json() if j.get('error'): raise AuthenticationError(j.get('error')) else: return j
def get_user_info(access_key: str, secret_key: str) -> dict[str, str]: """Returns details about an Archive.org user given an IA-S3 key pair. :param access_key: IA-S3 access_key to use when making the given request. :param secret_key: IA-S3 secret_key to use when making the given request. :returns: Archive.org use info. """ u = "https://s3.us.archive.org" p = {"check_auth": 1} r = requests.get(u, params=p, auth=auth.S3Auth(access_key, secret_key)) r.raise_for_status() j = r.json() if j.get("error"): raise AuthenticationError(j.get("error")) else: return j
def __init__(self, archive_session, request_kwargs=None): """ Initialize :class:`Catalog <Catalog>` object. :type archive_session: :class:`ArchiveSession <ArchiveSession>` :param archive_session: An :class:`ArchiveSession <ArchiveSession>` object. :type request_kwargs: dict :param request_kwargs: (optional) Keyword arguments to be used in :meth:`requests.sessions.Session.get` and :meth:`requests.sessions.Session.post` requests. """ self.session = archive_session self.auth = auth.S3Auth(self.session.access_key, self.session.secret_key) self.request_kwargs = request_kwargs if request_kwargs else {} self.url = f'{self.session.protocol}//{self.session.host}/services/tasks.php'