Esempio n. 1
0
    def __init__(self, item, name, file_metadata=None):
        """
        :type item: Item
        :param item: The item that the file is part of.

        :type name: str
        :param name: The filename of the file.

        :type file_metadata: dict
        :param file_metadata: (optional) a dict of metadata for the
                              given fille.
        """
        if six.PY2:
            try:
                name = name.decode('utf-8')
            except UnicodeEncodeError:
                pass
        super(File, self).__init__(item.item_metadata, name, file_metadata)
        self.item = item
        url_parts = dict(
            protocol=item.session.protocol,
            id=self.identifier,
            name=urllib.parse.quote(name.encode('utf-8')),
            host=item.session.host,
        )
        self.url = '{protocol}//{host}/download/{id}/{name}'.format(
            **url_parts)
        if self.item.session.access_key and self.item.session.secret_key:
            self.auth = auth.S3Auth(self.item.session.access_key,
                                    self.item.session.secret_key)
        else:
            self.auth = None
Esempio n. 2
0
    def get_task_log(task_id, session, request_kwargs=None):
        """Static method for getting a task log, given a task_id.

        This method exists so a task log can be retrieved without
        retrieving the items task history first.

        :type task_id: str or int
        :param task_id: The task id for the task log you'd like to fetch.

        :type archive_session: :class:`ArchiveSession <ArchiveSession>`

        :type request_kwargs: dict
        :param request_kwargs: (optional) Keyword arguments that
                               :py:class:`requests.Request` takes.

        :rtype: str
        :returns: The task log as a string.
        """
        request_kwargs = request_kwargs if request_kwargs else dict()
        _auth = auth.S3Auth(session.access_key, session.secret_key)
        if session.host == 'archive.org':
            host = 'catalogd.archive.org'
        else:
            host = session.host
        url = '{}//{}/services/tasks.php'.format(session.protocol, host)
        params = dict(task_log=task_id)
        r = session.get(url, params=params, auth=_auth, **request_kwargs)
        r.raise_for_status()
        return r.content.decode('utf-8', errors='surrogateescape')
Esempio n. 3
0
    def get_metadata(self,
                     identifier: str,
                     request_kwargs: MutableMapping | None = None):
        """Get an item's metadata from the `Metadata API
        <http://blog.archive.org/2013/07/04/metadata-api/>`__

        :param identifier: Globally unique Archive.org identifier.

        :returns: Metadat API response.
        """
        request_kwargs = request_kwargs or {}
        url = f'{self.protocol}//{self.host}/metadata/{identifier}'
        if 'timeout' not in request_kwargs:
            request_kwargs['timeout'] = 12
        try:
            if self.access_key and self.secret_key:
                s3_auth = auth.S3Auth(self.access_key, self.secret_key)
            else:
                s3_auth = None
            resp = self.get(url, auth=s3_auth, **request_kwargs)
            resp.raise_for_status()
        except Exception as exc:
            error_msg = f'Error retrieving metadata from {url}, {exc}'
            logger.error(error_msg)
            raise type(exc)(error_msg)
        return resp.json()
Esempio n. 4
0
    def __init__(self, item, name, file_metadata=None):
        """
        :type item: Item
        :param item: The item that the file is part of.

        :type name: str
        :param name: The filename of the file.

        :type file_metadata: dict
        :param file_metadata: (optional) a dict of metadata for the
                              given file.
        """
        super().__init__(item.item_metadata, name, file_metadata)
        self.item = item
        url_parts = {
            'protocol': item.session.protocol,
            'id': self.identifier,
            'name': quote(name.encode('utf-8')),
            'host': item.session.host,
        }
        self.url = '{protocol}//{host}/download/{id}/{name}'.format(**url_parts)
        if self.item.session.access_key and self.item.session.secret_key:
            self.auth = auth.S3Auth(self.item.session.access_key,
                                    self.item.session.secret_key)
        else:
            self.auth = None
Esempio n. 5
0
    def get_metadata(self, identifier, request_kwargs=None):
        """Get an item's metadata from the `Metadata API
        <http://blog.archive.org/2013/07/04/metadata-api/>`__

        :type identifier: str
        :param identifier: Globally unique Archive.org identifier.

        :rtype: dict
        :returns: Metadat API response.
        """
        request_kwargs = {} if not request_kwargs else request_kwargs
        url = '{0}//{1}/metadata/{2}'.format(self.protocol, self.host,
                                             identifier)
        if 'timeout' not in request_kwargs:
            request_kwargs['timeout'] = 12
        try:
            if self.access_key and self.secret_key:
                s3_auth = auth.S3Auth(self.access_key, self.secret_key)
            else:
                s3_auth = None
            resp = self.get(url, auth=s3_auth, **request_kwargs)
            resp.raise_for_status()
        except Exception as exc:
            error_msg = 'Error retrieving metadata from {0}, {1}'.format(
                url, exc)
            logger.error(error_msg)
            raise type(exc)(error_msg)
        return resp.json()
Esempio n. 6
0
def get_auth_config(username, password):
    payload = dict(
        username=username,
        password=password,
        remember='CHECKED',
        action='login',
    )

    with requests.Session() as s:
        # Attache logged-in-* cookies to Session.
        u = 'https://archive.org/account/login.php'
        r = s.post(u, data=payload, cookies={'test-cookie': '1'})
        if 'logged-in-sig' not in s.cookies:
            raise AuthenticationError(
                'Authentication failed. '
                'Please check your credentials and try again.')

        # Get S3 keys.
        u = 'https://archive.org/account/s3.php'
        p = dict(output_json=1)
        r = s.get(u, params=p)
        j = r.json()
        access_key = j['key']['s3accesskey']
        secret_key = j['key']['s3secretkey']
        if not j or not j.get('key'):
            raise AuthenticationError(
                'Authentication failed. '
                'Please check your credentials and try again.')

        # Get user info (screenname).
        u = 'https://s3.us.archive.org'
        p = dict(check_auth=1)
        r = requests.get(u, params=p, auth=auth.S3Auth(access_key, secret_key))
        r.raise_for_status()
        j = r.json()
        if j.get('error'):
            raise AuthenticationError(j.get('error'))
        user_info = j['screenname']

        auth_config = {
            's3': {
                'access': access_key,
                'secret': secret_key,
            },
            'cookies': {
                'logged-in-user': s.cookies['logged-in-user'],
                'logged-in-sig': s.cookies['logged-in-sig'],
            },
            'general': {
                'screenname': user_info,
            }
        }

    return auth_config
Esempio n. 7
0
    def __init__(self,
                 metadata=None,
                 queue_derive=True,
                 access_key=None,
                 secret_key=None,
                 **kwargs):

        super(S3Request, self).__init__(**kwargs)

        if not self.auth:
            self.auth = auth.S3Auth(access_key, secret_key)

        # Default empty dicts for dict params.
        metadata = {} if metadata is None else metadata

        self.metadata = metadata
        self.queue_derive = queue_derive
Esempio n. 8
0
def get_user_info(access_key, secret_key):
    """Returns details about an Archive.org user given an IA-S3 key pair.

    :type access_key: str
    :param access_key: IA-S3 access_key to use when making the given request.

    :type secret_key: str
    :param secret_key: IA-S3 secret_key to use when making the given request.
    """
    u = 'https://s3.us.archive.org'
    p = dict(check_auth=1)
    r = requests.get(u, params=p, auth=auth.S3Auth(access_key, secret_key))
    r.raise_for_status()
    j = r.json()
    if j.get('error'):
        raise AuthenticationError(j.get('error'))
    else:
        return j
Esempio n. 9
0
def get_user_info(access_key: str, secret_key: str) -> dict[str, str]:
    """Returns details about an Archive.org user given an IA-S3 key pair.

    :param access_key: IA-S3 access_key to use when making the given request.

    :param secret_key: IA-S3 secret_key to use when making the given request.

    :returns: Archive.org use info.
    """
    u = "https://s3.us.archive.org"
    p = {"check_auth": 1}
    r = requests.get(u, params=p, auth=auth.S3Auth(access_key, secret_key))
    r.raise_for_status()
    j = r.json()
    if j.get("error"):
        raise AuthenticationError(j.get("error"))
    else:
        return j
Esempio n. 10
0
    def __init__(self, archive_session, request_kwargs=None):
        """
        Initialize :class:`Catalog <Catalog>` object.

        :type archive_session: :class:`ArchiveSession <ArchiveSession>`
        :param archive_session: An :class:`ArchiveSession <ArchiveSession>`
                                object.

        :type request_kwargs: dict
        :param request_kwargs: (optional) Keyword arguments to be used
                               in :meth:`requests.sessions.Session.get`
                               and :meth:`requests.sessions.Session.post`
                               requests.
        """
        self.session = archive_session
        self.auth = auth.S3Auth(self.session.access_key,
                                self.session.secret_key)
        self.request_kwargs = request_kwargs if request_kwargs else {}
        self.url = f'{self.session.protocol}//{self.session.host}/services/tasks.php'