Exemple #1
0
 def test_copy(self):
     cid = CaseInsensitiveDict({
         'Accept': 'application/json',
         'user-Agent': 'requests',
     })
     cid_copy = cid.copy()
     assert cid == cid_copy
     cid['changed'] = True
     assert cid != cid_copy
Exemple #2
0
 def test_copy(self):
     cid = CaseInsensitiveDict({
         'Accept': 'application/json',
         'user-Agent': 'requests',
     })
     cid_copy = cid.copy()
     assert cid == cid_copy
     cid['changed'] = True
     assert cid != cid_copy
Exemple #3
0
class TestCaseInsensitiveDict:
    @pytest.fixture(autouse=True)
    def setup(self):
        """CaseInsensitiveDict instance with "Accept" header."""
        self.case_insensitive_dict = CaseInsensitiveDict()
        self.case_insensitive_dict["Accept"] = "application/json"

    def test_list(self):
        assert list(self.case_insensitive_dict) == ["Accept"]

    possible_keys = pytest.mark.parametrize(
        "key", ("accept", "ACCEPT", "aCcEpT", "Accept")
    )

    @possible_keys
    def test_getitem(self, key):
        assert self.case_insensitive_dict[key] == "application/json"

    @possible_keys
    def test_delitem(self, key):
        del self.case_insensitive_dict[key]
        assert key not in self.case_insensitive_dict

    def test_lower_items(self):
        assert list(self.case_insensitive_dict.lower_items()) == [
            ("accept", "application/json")
        ]

    def test_repr(self):
        assert repr(self.case_insensitive_dict) == "{'Accept': 'application/json'}"

    def test_copy(self):
        copy = self.case_insensitive_dict.copy()
        assert copy is not self.case_insensitive_dict
        assert copy == self.case_insensitive_dict

    @pytest.mark.parametrize(
        "other, result",
        (
            ({"AccePT": "application/json"}, True),
            ({}, False),
            (None, False),
        ),
    )
    def test_instance_equality(self, other, result):
        assert (self.case_insensitive_dict == other) is result
Exemple #4
0
class TestCaseInsensitiveDict:

    @pytest.fixture(autouse=True)
    def setup(self):
        """
        CaseInsensitiveDict instance with "Accept" header.
        """
        self.case_insensitive_dict = CaseInsensitiveDict()
        self.case_insensitive_dict['Accept'] = 'application/json'

    def test_list(self):
        assert list(self.case_insensitive_dict) == ['Accept']

    possible_keys = pytest.mark.parametrize('key', ('accept', 'ACCEPT', 'aCcEpT', 'Accept'))

    @possible_keys
    def test_getitem(self, key):
        assert self.case_insensitive_dict[key] == 'application/json'

    @possible_keys
    def test_delitem(self, key):
        del self.case_insensitive_dict[key]
        assert key not in self.case_insensitive_dict

    def test_lower_items(self):
        assert list(self.case_insensitive_dict.lower_items()) == [('accept', 'application/json')]

    def test_repr(self):
        assert repr(self.case_insensitive_dict) == "{'Accept': 'application/json'}"

    def test_copy(self):
        copy = self.case_insensitive_dict.copy()
        assert copy is not self.case_insensitive_dict
        assert copy == self.case_insensitive_dict

    @pytest.mark.parametrize(
        'other, result', (
            ({'AccePT': 'application/json'}, True),
            ({}, False),
            (None, False)
        )
    )
    def test_instance_equality(self, other, result):
        assert (self.case_insensitive_dict == other) is result
Exemple #5
0
class Client:
    DEFAULT_PROTOCOL = "messagepack"

    def __init__(
        self,
        url,
        version=None,
        protocol=DEFAULT_PROTOCOL,
        path=None,
        request="",
        timeout=None,
        dnscache=None,
        headers=None,
        auth=None,
        stream=False,
        log=DUMMY_LOG,
        raise_exception=True,
    ):

        headers = headers or {}

        self._url = url
        self._version = version
        self._protocol = protocol  # FIXME: check validity

        self._path = path or []
        self._request = request
        self._timeout = timeout
        self._dnscache = dnscache
        self._headers = CaseInsensitiveDict(headers)
        self._auth = auth
        self._stream = stream
        self._log = log
        self._raise_exception = raise_exception

        if not self._dnscache:
            self._dnscache = DNSCache()

    def _get_state(self):
        return dict(
            url=self._url,
            version=self._version,
            protocol=self._protocol,
            path=self._path,
            request=self._request,
            timeout=self._timeout,
            dnscache=self._dnscache,
            headers=self._headers,
            auth=self._auth,
            stream=self._stream,
            log=self._log,
            raise_exception=self._raise_exception,
        )

    def _copy(self, **kwargs):
        _kwargs = self._get_state()
        _kwargs.update(kwargs)
        return Client(**_kwargs)

    def _prepare_request(self, post_body, get_params=None):
        headers = self._headers.copy()

        if self._request:
            for hk, hv in self._request.headers.items():
                if not hk.lower().startswith("x-kwikapi-"):
                    continue
                headers[hk] = hv

            headers[REQUEST_ID_HEADER] = self._request.id

        headers[PROTOCOL_HEADER] = self._protocol

        upath = [self._version] + self._path
        upath = "/".join(x for x in upath if x)
        url = urljoin(self._url, upath)

        if get_params:
            url = "{}?{}".format(url, urlencode(get_params))

        url = self._dnscache.map_url(url)
        if self._auth:
            self._auth.sign(url, headers, post_body)

        return url, post_body, headers

    def _make_request(self, url, post_body, headers):
        req = urllib.request.Request(url, data=post_body, headers=headers)
        res = urllib.request.urlopen(req)

        proto = PROTOCOLS[res.headers.get("X-KwikAPI-Protocol",
                                          self._protocol)]

        if self._stream:
            res = proto.deserialize_stream(res)
            res = Client._extract_stream_response(res, self._raise_exception)
        else:
            res = self._deserialize_response(res.read(), proto,
                                             self._raise_exception)

        return res

    @staticmethod
    def _deserialize_response(data, proto, raise_exception=True):
        proto = Client._get_protocol(
            proto)  # Checking for a valid protocol object
        r = proto.deserialize(data)
        return Client._extract_response(r, raise_exception)

    @staticmethod
    def _get_protocol(proto):
        if isinstance(proto, str):
            return PROTOCOLS[proto]
        else:
            return proto

    @staticmethod
    def _extract_response(r, raise_exception=True):
        success = r["success"]
        if not success:
            r.pop("success")
            r = ResponseError(r)
            if raise_exception:
                raise r
        else:
            r = r["result"]

        return r

    @staticmethod
    def _extract_stream_response(res, raise_exception=True):
        for r in res:
            yield Client._extract_response(r, raise_exception)

    @staticmethod
    def _serialize_params(params, protocol):
        proto = PROTOCOLS[protocol]
        data = proto.serialize(params)
        return data

    def __call__(self, *args, **kwargs):
        if args:
            raise NonKeywordArgumentsError(args)

        if self._path:
            # FIXME: support streaming in both directions
            _kwargs = get_loggable_params(kwargs or {})

            self._log.debug(
                "kwikapi.client.__call__",
                path=self._path,
                kwargs=_kwargs,
                url=self._url,
                version=self._version,
                protocol=self._protocol,
            )

            post_body = self._serialize_params(kwargs, self._protocol)
            url, post_body, headers = self._prepare_request(post_body)
            res = self._make_request(url, post_body, headers)

            return res

        else:
            return self._copy(**kwargs)

    def __getattr__(self, attr):
        return self._copy(path=self._path + [attr])
Exemple #6
0
class BasePixivAPI(object):
    client_id = 'MOBrBDS8blbauoSck0ZfDbtuzpyT'
    client_secret = 'lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj'
    hash_secret = '28c1fdd170a5204386cb1313c7077b34f83e4aaf4aa829ce78c231e05b0bae2c'

    def __init__(self, **requests_kwargs):
        """initialize requests kwargs if need be"""
        self.user_id = 0
        self.access_token = None
        self.refresh_token = None
        # self.requests = requests.Session()
        self.requests = cloudscraper.create_scraper()  # fix due to #140
        self.additional_headers = CaseInsensitiveDict(
            requests_kwargs.pop('headers', {}))
        self.requests_kwargs = requests_kwargs

    def set_additional_headers(self, headers):
        """manually specify additional headers. will overwrite API default headers in case of collision"""
        self.additional_headers = CaseInsensitiveDict(headers)

    # 设置HTTP的Accept-Language (用于获取tags的对应语言translated_name)
    # language: en-us, zh-cn, ...
    def set_accept_language(self, language):
        """set header Accept-Language for all requests (useful for get tags.translated_name)"""
        self.additional_headers['Accept-Language'] = language

    @classmethod
    def parse_json(cls, json_str):
        """parse str into JsonDict"""
        return json.loads(json_str, object_hook=JsonDict)

    def require_auth(self):
        if self.access_token is None:
            raise PixivError(
                'Authentication required! Call login() or set_auth() first!')

    def requests_call(self,
                      method,
                      url,
                      headers=None,
                      params=None,
                      data=None,
                      stream=False):
        """ requests http/https call for Pixiv API """
        merged_headers = self.additional_headers.copy()
        if headers:
            # Use the headers in the parameter to override the
            # additional_headers setting.
            merged_headers.update(headers)
        try:
            if method == 'GET':
                return self.requests.get(url,
                                         params=params,
                                         headers=merged_headers,
                                         stream=stream,
                                         **self.requests_kwargs)
            elif method == 'POST':
                return self.requests.post(url,
                                          params=params,
                                          data=data,
                                          headers=merged_headers,
                                          stream=stream,
                                          **self.requests_kwargs)
            elif method == 'DELETE':
                return self.requests.delete(url,
                                            params=params,
                                            data=data,
                                            headers=merged_headers,
                                            stream=stream,
                                            **self.requests_kwargs)
        except Exception as e:
            raise PixivError('requests %s %s error: %s' % (method, url, e))

        raise PixivError('Unknown method: %s' % method)

    def set_auth(self, access_token, refresh_token=None):
        self.access_token = access_token
        self.refresh_token = refresh_token

    def login(self, username, password):
        return self.auth(username=username, password=password)

    def set_client(self, client_id, client_secret):
        self.client_id = client_id
        self.client_secret = client_secret

    def auth(self,
             username=None,
             password=None,
             refresh_token=None,
             headers=None):
        """Login with password, or use the refresh_token to acquire a new bearer token"""
        local_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S+00:00')
        headers = CaseInsensitiveDict(headers or {})
        headers['x-client-time'] = local_time
        headers['x-client-hash'] = hashlib.md5(
            (local_time + self.hash_secret).encode('utf-8')).hexdigest()
        # Allow mock UA due to #171: https://github.com/upbit/pixivpy/issues/171
        if 'user-agent' not in headers:
            headers['app-os'] = 'ios'
            headers['app-os-version'] = '14.6'
            headers['user-agent'] = 'PixivIOSApp/7.13.3 (iOS 14.6; iPhone13,2)'

        # noinspection PyUnresolvedReferences
        if not hasattr(self,
                       'hosts') or self.hosts == 'https://app-api.pixiv.net':
            auth_hosts = 'https://oauth.secure.pixiv.net'
        else:
            # noinspection PyUnresolvedReferences
            auth_hosts = self.hosts  # BAPI解析成IP的场景
            headers['host'] = 'oauth.secure.pixiv.net'
        url = '%s/auth/token' % auth_hosts
        data = {
            'get_secure_url': 1,
            'client_id': self.client_id,
            'client_secret': self.client_secret,
        }

        if username and password:
            data['grant_type'] = 'password'
            data['username'] = username
            data['password'] = password
        elif refresh_token or self.refresh_token:
            data['grant_type'] = 'refresh_token'
            data['refresh_token'] = refresh_token or self.refresh_token
        else:
            raise PixivError(
                '[ERROR] auth() but no password or refresh_token is set.')

        r = self.requests_call('POST', url, headers=headers, data=data)
        if r.status_code not in {200, 301, 302}:
            if data['grant_type'] == 'password':
                raise PixivError(
                    '[ERROR] auth() failed! check username and password.\nHTTP %s: %s'
                    % (r.status_code, r.text),
                    header=r.headers,
                    body=r.text,
                )
            else:
                raise PixivError(
                    '[ERROR] auth() failed! check refresh_token.\nHTTP %s: %s'
                    % (r.status_code, r.text),
                    header=r.headers,
                    body=r.text,
                )

        token = None
        try:
            # get access_token
            token = self.parse_json(r.text)
            self.user_id = token.response.user.id
            self.access_token = token.response.access_token
            self.refresh_token = token.response.refresh_token
        except json.JSONDecodeError:
            raise PixivError('Get access_token error! Response: %s' % token,
                             header=r.headers,
                             body=r.text)

        # return auth/token response
        return token

    def download(self,
                 url,
                 prefix='',
                 path=os.path.curdir,
                 name=None,
                 replace=False,
                 fname=None,
                 referer='https://app-api.pixiv.net/'):
        """Download image to file (use 6.0 app-api)"""
        if hasattr(fname, 'write'):
            # A file-like object has been provided.
            file = fname
        else:
            # Determine file path by parameters.
            name = prefix + (name or fname or os.path.basename(url))
            file = os.path.join(path, name)
            if os.path.exists(file) and not replace:
                return False

        with self.requests_call('GET',
                                url,
                                headers={'Referer': referer},
                                stream=True) as response:
            if isinstance(file, str):
                with open(file, 'wb') as out_file:
                    shutil.copyfileobj(response.raw, out_file)
            else:
                shutil.copyfileobj(response.raw, file)
        return True
Exemple #7
0
class Site:
    """
    This object represents a MediaWiki API endpoint,
    e.g. https://en.wikipedia.org/w/api.php
    * url: Full url to site's api.php
    * session: current request.session object
    * log: an object that will be used for logging. ConsoleLog is created by default
    """

    def __init__(self, url, headers=None, session=None, logger=None,
                 json_object_hook=None):
        """
        Create a new Site object with a given MediaWiki API endpoint.
        You should always set a `User-Agent` header to identify your bot and allow
        site owner to contact you in case your bot misbehaves.
        By default, User-Agent is set to the dir name + script name of your bot.
        :param str url: API endpoint URL, e.g. https://en.wikipedia.org/w/api.php
        :param Union[dict, CaseInsensitiveDict] headers: Optional headers as a dict.
        :param requests.Session session: Allows user-supplied custom Session
            parameters, e.g. retries.
        :param logging.Logger logger: Optional logger object for custom log output
        :param object json_object_hook: use this param to set a custom json object
            creator, e.g. pywikiapi.AttrDict. AttrDict allows direct property access
            to the result, e.g response.query.allpages in addition to
            response['query']['allpages']
        """
        if logger is None:
            self.logger = logging.getLogger('pywikiapi')
            self.logger.setLevel(logging.INFO)
        else:
            self.logger = logger

        self.json_object_hook = json_object_hook
        self.session = session if session else requests.Session()
        self.url = url
        self.tokens = {}
        self.no_ssl = False  # For non-ssl sites, might be needed to avoid HTTPS
        self._is_bot = None  # Will be set by the is_bot()
        self.maxlag = 5  # See https://www.mediawiki.org/wiki/Manual:Maxlag_parameter

        # If request is bigger than this, use POST instead
        self.auto_post_min_size = 2000

        # Number of retries to do in case of the lag error.
        # 0 - don't retry. negative - infinite.
        self.retry_on_lag_error = 10

        # This var will contain (username,password) after the .login()
        # in case of the login-on-demand mode
        self._loginOnDemand = False  # type: Union[Tuple[str, str], bool]
        self.logged_in = False

        self.headers = CaseInsensitiveDict()
        if headers:
            self.headers.update(headers)
        if u'User-Agent' not in self.headers:
            try:
                script = Path(sys.modules['__main__'].__file__)
            except (KeyError, AttributeError):
                script = Path(sys.executable)
            self.headers[u'User-Agent'] = \
                f'{script.parent.parent.name}-{script.name} pywikiapi/4.3.0'

    def __call__(self, action, **kwargs):
        """
            Make an API call with any arguments provided as named values:

                data = site('query', meta='siteinfo')

            By default uses GET request to the default URL set in the Site constructor.
            In case of an error, ApiError exception will be raised
            Any warnings will be logged via the logging interface

            :param str action : any of the MW API actions, e.g. 'query' and 'login'

            Several special "magic" parameters could be used to customize api call.
            Special parameters must be all CAPS to avoid collisions with the server API:
            :param POST: Use POST method when calling server API. Value is ignored.
            :param HTTPS: Force https (ssl) protocol for this request. Value is ignored.
            :param SSL: Same as HTTPS
            :param EXTRAS: Any extra parameters as passed to requests
                session.request(). Value is a dict()
            :param NO_LOGIN: do not attempt to do a login step if True
        """
        if self._loginOnDemand and action != 'login' and (
            'NO_LOGIN' not in kwargs
            or not kwargs['NO_LOGIN']
        ):
            self.login(self._loginOnDemand[0], self._loginOnDemand[1])

        method, request_kw = self._prepare_call(action, kwargs)

        try_count = 0
        while True:
            try_count += 1
            response = self.request(method, **request_kw)
            data = self.parse_json(response)
            try:
                if data['error']['code'] != 'maxlag':
                    break
            except KeyError:
                break

            retry_after = float(response.headers.get('Retry-After', 5))
            no_retry = 0 <= self.retry_on_lag_error < try_count

            if self.logger.isEnabledFor(logging.WARNING if no_retry else logging.INFO):
                # X-Database-Lag: The number of seconds of lag of the most lagged slave
                message = "Server exceeded maxlag"
                if not no_retry:
                    message += f", retrying in {retry_after}s"
                if 'lag' in data['error']:
                    message += f", lag={data['error']['lag']}"
                message += f", API={self.url}"

                log = self.logger.warning if no_retry else self.logger.info
                log(message, {
                    'code': 'maxlag-retry',
                    'retry-after': retry_after,
                    'lag': data['error']['lag'] if 'lag' in data['error'] else None,
                    'x-database-lag': response.headers.get('X-Database-Lag', 5)
                })

            if no_retry:
                break

            time.sleep(retry_after)

        # Handle success and failure
        if 'error' in data:
            raise ApiError('Server API Error', data['error'])
        if 'warnings' in data and self.logger.isEnabledFor(logging.WARNING):
            message = '\n'.join((
                str(vv[1]['warnings'] if 'warnings' in vv[1] else vv[1])
                for vv in sorted(data['warnings'].items(),
                                 key=lambda v: '' if v[0] == 'main' else v[0])))
            self.logger.warning(message,
                                dict(code='server-warnings', warnings=data['warnings']))
        return data

    def _prepare_call(self, action, kwargs):
        """
        Prepares parameters before calling MW API
        :param str action: which MW API action to do
        :param dict kwargs: key-value parameters as passed to the self.__call__()
        :return:
        """
        # Magic CAPS parameters
        method = 'POST' if 'POST' in kwargs or action in ['login', 'edit'] else 'GET'
        request_kw = dict() if 'EXTRAS' not in kwargs else kwargs['EXTRAS']
        request_kw['force_ssl'] = \
            not self.no_ssl and \
            (action == 'login' or 'SSL' in kwargs or 'HTTPS' in kwargs)
        # Clean up magic CAPS params as they shouldn't be passed to the server
        for k in ['POST', 'SSL', 'HTTPS', 'EXTRAS', 'NO_LOGIN']:
            if k in kwargs:
                del kwargs[k]

        def update_value(value):
            if value is None:
                return None
            if isinstance(value, datetime):
                # .isoformat() wouldn't work because it sometimes
                # produces +00:00 that MW does not support
                # Also perform sanity check here to make sure this is a UTC time
                if value.tzinfo is not None and value.tzinfo.utcoffset(value):
                    raise ValueError('datetime value has a non-UTC timezone')
                return value.strftime('%Y-%m-%dT%H:%M:%SZ')
            if isinstance(value, bool):
                return '1' if value else None
            return str(value)

        for k, val in list(kwargs.items()):
            # Support all iterables as lists except for strings
            try:
                iter(val)
                iterable = not isinstance(val, str)
            except TypeError:
                iterable = False
            if iterable:
                val = [update_value(v) for v in val]
                kwargs[k] = u'|'.join(filter(lambda v: v is not None, val))
            else:
                val = update_value(val)
                if val is not None:
                    kwargs[k] = val
                else:
                    del kwargs[k]
        # Make server call
        kwargs['action'] = action
        kwargs['format'] = 'json'
        if 'formatversion' not in kwargs:
            kwargs['formatversion'] = 2
        if self.maxlag is not None and 'maxlag' not in kwargs:
            kwargs['maxlag'] = self.maxlag

        # Estimate the size of the utf-8 encoded URL, and auto-switch to POST if too big
        data_size = sum(
            len(str(k).encode('utf-8')) + len(str(v).encode('utf-8')) + 2
            for k, v in kwargs.items())
        if data_size > self.auto_post_min_size:
            method = 'POST'

        if method == 'POST':
            request_kw['data'] = kwargs
        else:
            request_kw['params'] = kwargs

        return method, request_kw

    def login(self, user, password, on_demand=False):
        """
        :param str user: user login name
        :param str password: user password
        :param bool on_demand: postpone login until an actual API request is made
        """
        self.tokens = {}
        if on_demand:
            self._loginOnDemand = (user, password)
            return
        res = self('login', lgname=user, lgpassword=password,
                   lgtoken=self.token('login'))['login']
        if res['result'] != 'Success':
            raise ApiError('Login failed', res)
        self._loginOnDemand = False
        self.logged_in = True

    def is_bot(self) -> bool:
        """
        Checks if the current user account has the "bot" user right.
        """
        if self._is_bot is None:
            res = self('query', meta='userinfo', uiprop='rights')
            self._is_bot = 'bot' in res.query.userinfo.rights
        return self._is_bot

    def query(self, **kwargs):
        """
        Call Query API with given parameters, and yield all results returned
        by the server, properly handling result continuation.
        """
        return self.iterate('query', **kwargs)

    def iterate(self, action, **kwargs):
        """
        Call any "continuation" style MW API with given parameters, such as
        the 'query' API. Yields all results returned by the server, properly
        handling result continuation. Use generator.send({...}) to dynamically
        adjust next request's parameters with the new parameters.
        :param str action: MW API action, e.g. 'query'
        :param kwargs: any API parameters
        :return: yields each response from the server
        """
        if 'rawcontinue' in kwargs:
            raise ValueError("rawcontinue is not supported with query() function, "
                             "use object's __call__()")
        if 'formatversion' in kwargs:
            raise ValueError("version is not supported with query() function, "
                             "use object's __call__()")
        if 'continue' not in kwargs:
            kwargs['continue'] = ''
        req = kwargs
        req['formatversion'] = 2
        while True:
            result = self(action, **req)
            if action in result:
                adjustments = yield result[action]
            else:
                adjustments = None
            if 'continue' not in result:
                break
            # re-send all continue values in the next call
            req = kwargs.copy()
            req.update(result['continue'])
            if adjustments:
                req.update(adjustments)

    def query_pages(self, **kwargs):
        """
        Query the server and yield all page objects one by one.
        This method makes sure that results received in multiple responses are
        correctly merged together.
        If any of the pages change during iteration, ApiPagesModifiedError(list)
        will be thrown after all other pages have been processed and yielded.
        """
        # A dict with incomplete page objects
        incomplete = {}
        # A set of page ids that we will ignore because
        # they have been modified during iteration
        modified = set()
        missing = set()
        for result in self.query(**kwargs):
            if 'pages' not in result:
                raise ApiError('Missing pages element in query result', result)

            new_incomplete = {}
            for page in result['pages']:
                if 'missing' in page:
                    if page['title'] not in missing:
                        yield page
                        missing.add(page['title'])
                    continue
                page_id = page['pageid']
                if page_id in modified:
                    continue
                if page_id in incomplete:
                    p = incomplete[page_id]
                    del incomplete[page_id]
                    if 'lastrevid' in page and p['lastrevid'] != page['lastrevid']:
                        # someone else modified this page,
                        # it must be requested separately in a new query
                        modified.add(page_id)
                        continue
                    # Merge additional page data into the same dict
                    self._merge_page(p, page)
                else:
                    p = page
                new_incomplete[page_id] = p

            # Yield all pages that have not been mentioned in the last response
            for page_id, page in incomplete.items():
                yield page

            incomplete = new_incomplete

        # Iteration is done, all incomplete are thus complete
        for page_id, page in incomplete.items():
            yield page

        if modified:
            # some pages have been modified between api calls, notify caller
            raise ApiPagesModifiedError(list(modified))

    def _merge_page(self, a, b):
        """
        Recursively merge two page objects
        """
        for k in b:
            val = b[k]
            if k in a:
                if isinstance(val, dict):
                    self._merge_page(a[k], val)
                elif isinstance(val, list):
                    a[k] = a[k] + val
                else:
                    a[k] = val
            else:
                a[k] = val

    def token(self, token_type='csrf'):
        """
        Get an api token.
        :param str token_type:
        :return: str
        """
        if token_type not in self.tokens:
            res = self.query(meta='tokens', type=token_type,
                             NO_LOGIN=token_type == 'login')
            self.tokens[token_type] = next(res)['tokens'][token_type + 'token']
        return self.tokens[token_type]

    def request(self, method, force_ssl=False, headers=None, **request_kw):
        """Make a low level request to the server"""
        url = self.url
        if force_ssl:
            parts = list(urlparse.urlparse(url))
            parts[0] = 'https'
            url = urlparse.urlunparse(parts)
        if headers:
            h = self.headers.copy()
            h.update(headers)
            headers = h
        else:
            headers = self.headers

        r = self.session.request(method, url, headers=headers, **request_kw)
        if not r.ok:
            raise ApiError('Call failed', r)
        if self.logger.isEnabledFor(logging.DEBUG):
            message = f"Request: {r.request.url}\nResponse: {len(r.content):,} bytes"
            self.logger.debug(message, dict(
                code='server-response',
                url=r.request.url,
                headers=headers,
            ))
        return r

    def parse_json(self, value):
        """
        Utility function to convert server reply into a JSON object.
        By default, JSON objects support direct property access (JavaScript style)
        """
        if isinstance(value, str):
            # noinspection PyTypeChecker
            return json.loads(value, object_hook=self.json_object_hook)
        elif hasattr(value.__class__, 'json'):
            return value.json(object_hook=self.json_object_hook)
        else:
            # Our servers still have requests 0.8.2 ... :(
            # noinspection PyTypeChecker
            return json.loads(value.content, object_hook=self.json_object_hook)

    def __str__(self):
        res = self.url
        if self.logged_in:
            res += ' (logged in)'
        return res
Exemple #8
0
class Client:
    DEFAULT_PROTOCOL = 'pickle'

    def __init__(self, url, version=None, protocol=DEFAULT_PROTOCOL,
            path=None, request='', timeout=None, dnscache=None,
            headers=None, auth=None, stream=False, log=DUMMY_LOG):

        headers = headers or {}

        self._url = url
        self._version = version
        self._protocol = protocol # FIXME: check validity

        self._path = path or []
        self._request = request
        self._timeout = timeout
        self._dnscache = dnscache
        self._headers = CaseInsensitiveDict(headers)
        self._auth = auth
        self._stream = stream
        self._log = log

        if not self._dnscache:
            self._dnscache = DNSCache()

    def _get_state(self):
        return dict(url=self._url, version=self._version,
            protocol=self._protocol, path=self._path,
            request=self._request, timeout=self._timeout,
            dnscache=self._dnscache, headers=self._headers,
            auth=self._auth, stream=self._stream,log=self._log)

    def _copy(self, **kwargs):
        _kwargs = self._get_state()
        _kwargs.update(kwargs)
        return Client(**_kwargs)

    def _prepare_request(self, post_body, get_params=None):
        headers = self._headers.copy()

        if self._request:
            for hk, hv in self._request.headers.items():
                if not hk.lower().startswith('x-kwikapi-'):
                    continue
                headers[hk] = hv

            headers[REQUEST_ID_HEADER] = self._request.id

        headers[PROTOCOL_HEADER] = self._protocol

        upath = [self._version] + self._path
        upath = '/'.join(x for x in upath if x)
        url = urljoin(self._url, upath)

        if get_params:
            url = '{}?{}'.format(url, urlencode(get_params))

        url = self._dnscache.map_url(url)
        if self._auth:
            self._auth.sign(url, headers, post_body)

        return url, post_body, headers

    def _make_request(self, url, post_body, headers):
        req = urllib.request.Request(url, data=post_body, headers=headers)
        res = urllib.request.urlopen(req)

        if self._stream:
            proto = PROTOCOLS[self._protocol]
            res = proto.deserialize_stream(res)
            res = Client._extract_stream_response(res)
        else:
            res = self._deserialize_response(res.read(), self._protocol)

        return res

    @staticmethod
    def _deserialize_response(data, protocol):
        proto = PROTOCOLS[protocol]
        r = proto.deserialize(data)
        return Client._extract_response(r)

    @staticmethod
    def _extract_response(r):
        success = r['success']
        if not success:
            raise Exception(r['message']) # FIXME: raise proper exc
        else:
            r = r['result']

        return r

    @staticmethod
    def _extract_stream_response(res):
        for r in res:
            yield Client._extract_response(r)

    @staticmethod
    def _serialize_params(params, protocol):
        proto = PROTOCOLS[protocol]
        data = proto.serialize(params)
        return data

    def __call__(self, *args, **kwargs):
        assert(not args) # FIXME: raise appropriate exception

        if self._path:
            # FIXME: support streaming in both directions
            _kwargs = get_loggable_params(kwargs or {})

            self._log.debug('kwikapi.client.__call__',
                    path=self._path, kwargs=_kwargs,
                    url=self._url, version=self._version, protocol=self._protocol)

            post_body = self._serialize_params(kwargs, self._protocol)
            url, post_body, headers = self._prepare_request(post_body)
            res = self._make_request(url, post_body, headers)

            return res

        else:
            return self._copy(**kwargs)

    def __getattr__(self, attr):
        return self._copy(path=self._path + [attr])
Exemple #9
0
class Site(object):
    """
    Public properties (member variables at the moment):
    * url: Full url to site's api.php
    * session: current request.session object
    * log: an object that will be used for logging. ConsoleLog is created by default
    """

    def __init__(self, url, headers=None, session=None, log=None):
        self._loginOnDemand = False
        self.session = session if session else requests.session()
        self.log = log if log else ConsoleLog()
        self.url = url
        self.tokens = {}
        self.noSSL = False  # For non-ssl sites, it might be needed to avoid HTTPS

        try:
            script = os.path.abspath(sys.modules['__main__'].__file__)
        except (KeyError, AttributeError):
            script = sys.executable
        path, f = os.path.split(script)
        self.headers = CaseInsensitiveDict({u'User-Agent': u'%s-%s BareboneMWReq/0.1' % (os.path.basename(path), f)})
        if headers:
            self.headers.update(headers)

    def __call__(self, action, **kwargs):
        """
            Make an API call with any arguments provided as named values:

                data = site('query', meta='siteinfo')

            By default uses GET request to the default URL set in the Site constructor.
            In case of an error, ApiError exception will be raised
            Any warnings will be logged via the logging interface

            :param action could also be

            Several special "magic" parameters could be used to customize api call.
            Special parameters must be all CAPS to avoid collisions with the server API:
            :param POST: Use POST method when calling server API. Value is ignored.
            :param HTTPS: Force https (ssl) protocol for this request. Value is ignored.
            :param EXTRAS: Any extra parameters as passed to requests' session.request(). Value is a dict()
        """
        # Magic CAPS parameters
        method = 'POST' if 'POST' in kwargs or action in ['login', 'edit'] else 'GET'
        forceSSL = not self.noSSL and (action == 'login' or 'SSL' in kwargs or 'HTTPS' in kwargs)
        request_kw = dict() if 'EXTRAS' not in kwargs else kwargs['EXTRAS']

        # Clean up magic CAPS params as they shouldn't be passed to the server
        for k in ['POST', 'SSL', 'HTTPS', 'EXTRAS']:
            if k in kwargs:
                del kwargs[k]

        for k, val in kwargs.items():
            # Only support the well known types.
            # Everything else should be client's responsibility
            if isinstance(val, list) or isinstance(val, tuple):
                kwargs[k] = '|'.join(val)

        # Make server call
        kwargs['action'] = action
        kwargs['format'] = 'json'

        if method == 'POST':
            request_kw['data'] = kwargs
        else:
            request_kw['params'] = kwargs

        if self._loginOnDemand and action != 'login':
            self.login(self._loginOnDemand[0], self._loginOnDemand[1])

        data = parseJson(self.request(method, forceSSL=forceSSL, **request_kw))

        # Handle success and failure
        if 'error' in data:
            raise ApiError('Server API Error', data['error'])
        if 'warnings' in data:
            self.log(2, data['warnings'])
        return data

    def login(self, user, password, onDemand=False):
        """
        :param user:
        :param password:
        :param onDemand: if True, will postpone login until an actual API request is made
        :return:
        """
        self.tokens = {}
        if onDemand:
            self._loginOnDemand = (user, password)
            return
        res = self('login', lgname=user, lgpassword=password)['login']
        if res['result'] == 'NeedToken':
            res = self('login', lgname=user, lgpassword=password, lgtoken=res['token'])['login']
        if res['result'] != 'Success':
            raise ApiError('Login failed', res)
        self._loginOnDemand = False

    def query(self, **kwargs):
        """
        Call Query API with given parameters, and yield all results returned
        by the server, properly handling result continuation.
        """
        if 'rawcontinue' in kwargs:
            raise ValueError("rawcontinue is not supported with query() function, use object's __call__()")
        if 'continue' not in kwargs:
            kwargs['continue'] = ''
        req = kwargs
        while True:
            result = self('query', **req)
            if 'query' in result:
                yield result['query']
            if 'continue' not in result:
                break
            # re-send all continue values in the next call
            req = kwargs.copy()
            req.update(result['continue'])

    def queryPages(self, **kwargs):
        """
        Query the server and return all page objects individually.
        """
        incomplete = {}
        changed = set()
        for result in self.query(**kwargs):
            if 'pages' not in result:
                raise ApiError('Missing pages element in query result', result)

            finished = incomplete.copy()
            for pageId, page in result['pages'].items():
                if pageId in changed:
                    continue
                if pageId in incomplete:
                    del finished[pageId]  # If server returned it => not finished
                    p = incomplete[pageId]
                    if 'lastrevid' in page and p['lastrevid'] != page['lastrevid']:
                        # someone else modified this page, it must be requested anew separately
                        changed.add(pageId)
                        del incomplete[pageId]
                        continue
                    self._mergePage(p, page)
                else:
                    p = page
                incomplete[pageId] = p
            for pageId, page in finished.items():
                if pageId not in changed:
                    yield page

        for pageId, page in incomplete.items():
            yield page
        if changed:
            # some pages have been changed between api calls, notify caller
            raise ApiPagesModifiedError(list(changed))

    def _mergePage(self, a, b):
        """
        Recursively merge two page objects
        """
        for k in b:
            val = b[k]
            if k in a:
                if isinstance(val, dict):
                    self._mergePage(a[k], val)
                elif isinstance(val, list):
                    a[k] = a[k] + val
                else:
                    a[k] = val
            else:
                a[k] = val

    def token(self, tokenType='csrf'):
        if tokenType not in self.tokens:
            self.tokens[tokenType] = next(self.query(meta='tokens', type=tokenType))['tokens'][tokenType + 'token']
        return self.tokens[tokenType]

    def request(self, method, forceSSL=False, headers=None, **request_kw):
        """Make a low level request to the server"""
        url = self.url
        if forceSSL:
            parts = list(urlparse.urlparse(url))
            parts[0] = 'https'
            url = urlparse.urlunparse(parts)
        if headers:
            h = self.headers.copy()
            h.update(headers)
            headers = h
        else:
            headers = self.headers

        r = self.session.request(method, url, headers=headers, **request_kw)
        if not r.ok:
            raise ApiError('Call failed', r)

        if self.log.isEnabled(5):
            dbg = [r.request.url, headers]
            self.log(5, dbg)
        return r
Exemple #10
0
class BasePixivAPI(object):
    client_id = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
    client_secret = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
    hash_secret = "28c1fdd170a5204386cb1313c7077b34f83e4aaf4aa829ce78c231e05b0bae2c"

    def __init__(self, **requests_kwargs: Any) -> None:
        """initialize requests kwargs if need be"""
        self.user_id: Union[int, str] = 0
        self.access_token: Optional[str] = None
        self.refresh_token: Optional[str] = None
        self.hosts = "https://app-api.pixiv.net"

        # self.requests = requests.Session()
        self.requests = cloudscraper.create_scraper()  # fix due to #140
        self.additional_headers = CaseInsensitiveDict(
            requests_kwargs.pop("headers", {})
        )  # type: CaseInsensitiveDict[Any]
        self.requests_kwargs = requests_kwargs

    def set_additional_headers(self, headers: ParamDict) -> None:
        """manually specify additional headers. will overwrite API default headers in case of collision"""
        self.additional_headers = CaseInsensitiveDict(headers)

    # 设置HTTP的Accept-Language (用于获取tags的对应语言translated_name)
    # language: en-us, zh-cn, ...
    def set_accept_language(self, language: str) -> None:
        """set header Accept-Language for all requests (useful for get tags.translated_name)"""
        self.additional_headers["Accept-Language"] = language

    @classmethod
    def parse_json(cls, json_str: str) -> ParsedJson:
        """parse str into JsonDict"""
        return json.loads(json_str, object_hook=JsonDict)

    def require_auth(self) -> None:
        if self.access_token is None:
            raise PixivError(
                "Authentication required! Call login() or set_auth() first!"
            )

    def requests_call(
        self,
        method,
        url,
        headers=None,
        params=None,
        data=None,
        stream=False,
    ):
        # type: (str, str, Union[ParamDict, CaseInsensitiveDict[Any]], ParamDict, ParamDict, bool) -> Response
        """requests http/https call for Pixiv API"""
        merged_headers = self.additional_headers.copy()
        if headers:
            # Use the headers in the parameter to override the
            # additional_headers setting.
            merged_headers.update(headers)
        try:
            if method == "GET":
                return self.requests.get(
                    url,
                    params=params,
                    headers=merged_headers,
                    stream=stream,
                    **self.requests_kwargs
                )
            elif method == "POST":
                return self.requests.post(
                    url,
                    params=params,
                    data=data,
                    headers=merged_headers,
                    stream=stream,
                    **self.requests_kwargs
                )
            elif method == "DELETE":
                return self.requests.delete(
                    url,
                    params=params,
                    data=data,
                    headers=merged_headers,
                    stream=stream,
                    **self.requests_kwargs
                )
            else:
                raise PixivError("Unknown method: %s" % method)
        except Exception as e:
            raise PixivError("requests %s %s error: %s" % (method, url, e))

    def set_auth(self, access_token: str, refresh_token: Optional[str] = None) -> None:
        self.access_token = access_token
        self.refresh_token = refresh_token

    def login(self, username: str, password: str) -> Any:
        return self.auth(username=username, password=password)

    def set_client(self, client_id: str, client_secret: str) -> None:
        self.client_id = client_id
        self.client_secret = client_secret

    def auth(
        self,
        username: Optional[str] = None,
        password: Optional[str] = None,
        refresh_token: Optional[str] = None,
        headers: ParamDict = None,
    ) -> ParsedJson:
        """Login with password, or use the refresh_token to acquire a new bearer token"""
        local_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
        headers_ = CaseInsensitiveDict(headers or {})
        headers_["x-client-time"] = local_time
        headers_["x-client-hash"] = hashlib.md5(
            (local_time + self.hash_secret).encode("utf-8")
        ).hexdigest()
        # Allow mock UA due to #171: https://github.com/upbit/pixivpy/issues/171
        if "user-agent" not in headers_:
            headers_["app-os"] = "ios"
            headers_["app-os-version"] = "14.6"
            headers_["user-agent"] = "PixivIOSApp/7.13.3 (iOS 14.6; iPhone13,2)"

        # noinspection PyUnresolvedReferences
        if not hasattr(self, "hosts") or self.hosts == "https://app-api.pixiv.net":
            auth_hosts = "https://oauth.secure.pixiv.net"
        else:
            # noinspection PyUnresolvedReferences
            auth_hosts = self.hosts  # BAPI解析成IP的场景
            headers_["host"] = "oauth.secure.pixiv.net"
        url = "%s/auth/token" % auth_hosts
        data = {
            "get_secure_url": 1,
            "client_id": self.client_id,
            "client_secret": self.client_secret,
        }

        if username and password:
            data["grant_type"] = "password"
            data["username"] = username
            data["password"] = password
        elif refresh_token or self.refresh_token:
            data["grant_type"] = "refresh_token"
            data["refresh_token"] = refresh_token or self.refresh_token
        else:
            raise PixivError("[ERROR] auth() but no password or refresh_token is set.")

        r = self.requests_call("POST", url, headers=headers, data=data)
        if r.status_code not in {200, 301, 302}:
            if data["grant_type"] == "password":
                raise PixivError(
                    "[ERROR] auth() failed! check username and password.\nHTTP %s: %s"
                    % (r.status_code, r.text),
                    header=r.headers,
                    body=r.text,
                )
            else:
                raise PixivError(
                    "[ERROR] auth() failed! check refresh_token.\nHTTP %s: %s"
                    % (r.status_code, r.text),
                    header=r.headers,
                    body=r.text,
                )

        token = None
        try:
            # get access_token
            token = self.parse_json(r.text)
            self.user_id = token.response.user.id
            self.access_token = token.response.access_token
            self.refresh_token = token.response.refresh_token
        except json.JSONDecodeError:
            raise PixivError(
                "Get access_token error! Response: %s" % token,
                header=r.headers,
                body=r.text,
            )

        # return auth/token response
        return token

    def download(
        self,
        url: str,
        prefix: str = "",
        path: str = os.path.curdir,
        name: Optional[str] = None,
        replace: bool = False,
        fname: Optional[Union[str, IO[bytes]]] = None,
        referer: str = "https://app-api.pixiv.net/",
    ) -> bool:
        """Download image to file (use 6.0 app-api)"""
        if hasattr(fname, "write"):
            # A file-like object has been provided.
            file = fname
        else:
            # Determine file path by parameters.
            name = prefix + str(name or fname or os.path.basename(url))
            file = os.path.join(path, name)
            if os.path.exists(file) and not replace:
                return False

        with self.requests_call(
            "GET", url, headers={"Referer": referer}, stream=True
        ) as response:
            if isinstance(file, str):
                with open(file, "wb") as out_file:
                    shutil.copyfileobj(response.raw, out_file)
            else:
                shutil.copyfileobj(response.raw, file)  # type: ignore[arg-type]
        return True