Exemple #1
0
 def _create_session(self):
     """
     When the quota is exceeded for a token, the program will switch to another tokens and attempt to continue.
     If the quota is exceeded for all tokens, the program will wait for the token with the lowest wait time.
     """
     min_wait_time = 9999
     chosen_token = None
     updated_token = copy.deepcopy(self._tokens)
     for t in self._tokens:
         self._session = cachecontrol.CacheControl(requests.Session())
         self._session.headers['Authorization'] = 'token %s' % t
         has_wait, wait_time = self._exceeded_api_quota()
         if not has_wait:
             chosen_token = t
             min_wait_time = 0
             # if a token is chosen, move it to the end
             updated_token.append(t)
             del updated_token[updated_token.index(t)]
             break
         if wait_time < min_wait_time:
             min_wait_time = wait_time
             chosen_token = t
             # if a token is chosen, move it to the end
             updated_token.append(t)
             del updated_token[updated_token.index(t)]
     self._tokens = updated_token
     if not chosen_token:
         raise RuntimeError('Unexpected state: No GitHub token chosen in github.py.')
     log.debug('Chose token {}.'.format(chosen_token))
     if min_wait_time:
         # Sleep until the quota is reset. See https://developer.github.com/v3/#rate-limiting for more information.
         log.warning('Sleeping until the GitHub API quota is reset in', min_wait_time / 60, 'minutes.')
         time.sleep(min_wait_time)
     self._session = cachecontrol.CacheControl(requests.Session())
     self._session.headers['Authorization'] = 'token %s' % chosen_token
Exemple #2
0
    async def post(self, request):
        args = validate_required_params(request.args, 'idtoken')

        token = args['idtoken'][0].replace("'", "").replace('"', '')
        # example from https://developers.google.com/identity/sign-in/web/backend-auth
        try:
            session = requests.session()
            cached_session = cachecontrol.CacheControl(session)
            request = google.auth.transport.requests.Request(
                session=cached_session)
            id_info = id_token.verify_oauth2_token(token, request, CLIENT_ID)

            if id_info['iss'] not in [
                    'accounts.google.com', 'https://accounts.google.com'
            ]:
                raise ValueError('Wrong issuer.')

            user_id = id_info['sub']
            user_nickname = id_info['name']
            user_photo = str(id_info['picture']).replace('=s96-c', '')
            session_token = self.db.sign_in_or_create_oauth_user(
                user_id, user_nickname, user_photo)
            response = json({'session_id': session_token})
            response.cookies['session_token'] = session_token
            return response

        except ValueError:
            raise Unauthorized('Token not accepted')
            pass
Exemple #3
0
    def process_google_token(cls, token: str) -> dict:
        """Returns google account info if is valid.

        https://developers.google.com/identity/sign-in/web/backend-auth
        """
        session = requests.session()
        cached_session = cachecontrol.CacheControl(session)
        request = google_requests.Request(session=cached_session)

        try:
            account_info = id_token.verify_oauth2_token(
                token,
                request,
                settings.GOOGLE_CLIENT_ID,
            )

            if account_info.get('iss') != cls.GOOGLE_ACCOUNTS_URL:
                raise NotAuthenticated(**INVALID_GOOGLE_TOKEN_ISSUER)

            return UserService.create_or_update_for_social_networks(
                email=account_info.get('email'),
                first_name=account_info.get('first_name'),
                last_name=account_info.get('last_name'),
            )

        except (JSONDecodeError, TypeError, ValueError):
            raise NotAuthenticated(**INVALID_GOOGLE_TOKEN_ID)
Exemple #4
0
    def __init__(self,
                 base_url=None,
                 auth=None,
                 format=None,
                 append_slash=True,
                 session=None,
                 serializer=None,
                 cache=False):
        if serializer is None:
            serializer = Serializer(default=format)

        if session is None:
            if cache is False:
                session = requests.session()
            else:
                session = cachecontrol.CacheControl(requests.session())

        if auth is not None:
            session.auth = auth

        self._store = {
            "base_url": base_url,
            "format": format if format is not None else "json",
            "append_slash": append_slash,
            "session": session,
            "serializer": serializer,
        }

        # Do some Checks for Required Values
        if self._store.get("base_url") is None:
            raise exceptions.ImproperlyConfigured("base_url is required")
Exemple #5
0
    async def _sign_in_with_token(self, request):
        args = validate_required_params(request.args, 'idtoken')

        token = args['idtoken'][0].replace("'", "").replace('"', '')

        try:
            session = requests.session()
            cached_session = cachecontrol.CacheControl(session)
            request = google.auth.transport.requests.Request(
                session=cached_session)
            id_info = id_token.verify_oauth2_token(token, request,
                                                   self.client_id)

            if id_info['iss'] not in ['accounts.google.com',
                                      'https://accounts.google.com']:
                raise ValueError('Wrong issuer.')

            user_id = id_info['sub']
            user_nickname = id_info['name']
            user_photo = str(id_info['picture']).replace('=s96-c', '')

            session_token = self._db_manager.sign_in_or_create_oauth_user(
                user_id, user_nickname, user_photo)

            response = json({'session_token', session_token})
            response.cookies['session_token'] = session_token
            return response

        except ValueError:
            raise Unauthorized('Token not accepted')
            pass
Exemple #6
0
def get_repo(owner, name, token, cache_path='~/.gh2/cache'):
    cache_path = os.path.expanduser(cache_path)

    gh = github3.GitHub(token=token)
    gh.session = cachecontrol.CacheControl(
        gh.session, cache=cachecontrol.caches.FileCache(cache_path))
    return gh.repository(owner, name)
Exemple #7
0
def cli(ctx, url, token):
    spinner = Halo(text="Login and fetch forks", spinner="dots")
    spinner.start()

    if token:
        gh = github3.login(token=token)
    else:
        user = click.prompt("username",
                            hide_input=False,
                            confirmation_prompt=False)
        password = click.prompt("Password",
                                hide_input=True,
                                confirmation_prompt=True)
        gh = github3.login(user, password=password)
    cachecontrol.CacheControl(gh.session,
                              cache=FileCache(".fork_work_cache"),
                              heuristic=OneDayHeuristic())

    login, repo = urlparse(url).path[1:].split("/")
    repository = gh.repository(login, repo)
    forks = repository.forks()

    spinner.stop()
    RepoCtx = namedtuple("Repo", ["repository", "forks", "gh"])
    ctx.obj = RepoCtx(repo, forks, gh)
    def on_post(self, req, resp):
        if not validate_params(req.params, 'idtoken'):
            raise falcon.HTTPBadRequest(
                "oauth post requires 'idtoken' parameter")

        token = req.params['idtoken'].replace("'", "").replace('"', '')
        # example from https://developers.google.com/identity/sign-in/web/backend-auth
        try:
            session = requests.session()
            cached_session = cachecontrol.CacheControl(session)
            request = google.auth.transport.requests.Request(
                session=cached_session)
            id_info = id_token.verify_oauth2_token(token, request, CLIENT_ID)

            if id_info['iss'] not in [
                    'accounts.google.com', 'https://accounts.google.com'
            ]:
                raise ValueError('Wrong issuer.')

            user_id = id_info['sub']
            user_nickname = id_info['name']
            user_photo = str(id_info['picture']).replace('=s96-c', '')
            session_token = self.db.sign_in_or_create_oauth_user(
                user_id, user_nickname, user_photo)
            resp.status = falcon.HTTP_OK
            resp.body = session_token

        except ValueError:
            raise falcon.HTTPUnauthorized('Token not accepted')
            pass
Exemple #9
0
    def request(self, method, url, headers=None, params=None, proxies=None, cache=True, verify=False, *args, **kwargs):
        if headers is None: headers = {}
        if params is None: params = {}
        if proxies is None: proxies = {}

        url = self.normalize_url(url)

        headers.update({'Accept-Encoding': 'gzip, deflate'})
        headers.update(random.choice(USER_AGENTS))

        # request session ssl verify
        if sickrage.srCore.srConfig.SSL_VERIFY:
            try:
                verify = certifi.where()
            except:
                pass

        # request session proxies
        if 'Referer' not in headers and sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug("Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING)
            scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING)
            address = ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING),
                       sickrage.srCore.srConfig.PROXY_SETTING)[scheme]
            proxies.update({"http": address, "https": address})
            headers.update({'Referer': address})

        # setup session caching
        if cache:
            cache_file = os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sessions.db'))
            self.__class__ = cachecontrol.CacheControl(self,
                                                       cache=DBCache(cache_file),
                                                       heuristic=ExpiresAfter(days=7)).__class__

        # get web response
        response = super(srSession, self).request(method,
                                                  url,
                                                  headers=headers,
                                                  params=params,
                                                  verify=verify,
                                                  proxies=proxies,
                                                  *args, **kwargs)

        try:
            # check web response for errors
            response.raise_for_status()
        except requests.exceptions.SSLError as e:
            if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5):
                sickrage.srCore.srLogger.info(
                    "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+".format(
                        e.request.url, ssl.OPENSSL_VERSION))

            if sickrage.srCore.srConfig.SSL_VERIFY:
                sickrage.srCore.srLogger.info(
                    "SSL Error requesting url: '{}', try disabling cert verification in advanced settings".format(
                        e.request.url))
        except Exception:
            pass

        return response
Exemple #10
0
 def __init__(self, verbose=True):
     self.verbose = verbose
     if verbose:
         LOG.setLevel(logging.DEBUG)
     self.sess = cachecontrol.CacheControl(requests.Session())
     self.auth = (CLIENT_ID, CLIENT_SECRET)
     self.access_token = self._refresh_token()
     self.auth_headers = {"Authorization": "Bearer " + self.access_token}
Exemple #11
0
    def request(self,
                method,
                url,
                headers=None,
                params=None,
                cache=True,
                raise_exceptions=True,
                *args,
                **kwargs):
        url = self.normalize_url(url)
        kwargs.setdefault('params', {}).update(params or {})
        kwargs.setdefault('headers', {}).update(headers or {})

        # if method == 'POST':
        #    self.session.headers.update({"Content-type": "application/x-www-form-urlencoded"})
        kwargs.setdefault('headers',
                          {}).update({'Accept-Encoding': 'gzip, deflate'})
        kwargs.setdefault('headers', {}).update(random.choice(USER_AGENTS))

        # request session ssl verify
        kwargs['verify'] = False
        if sickrage.srCore.srConfig.SSL_VERIFY:
            try:
                kwargs['verify'] = certifi.where()
            except:
                pass

        # request session proxies
        if 'Referer' not in kwargs.get(
                'headers', {}) and sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug(
                "Using global proxy: " +
                sickrage.srCore.srConfig.PROXY_SETTING)
            scheme, address = urllib2.splittype(
                sickrage.srCore.srConfig.PROXY_SETTING)
            address = \
            ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING), sickrage.srCore.srConfig.PROXY_SETTING)[scheme]
            kwargs.setdefault('proxies', {}).update({
                "http": address,
                "https": address
            })
            kwargs.setdefault('headers', {}).update({'Referer': address})

        # setup session caching
        if cache:
            cache_file = os.path.abspath(
                os.path.join(sickrage.DATA_DIR, 'sessions.db'))
            cachecontrol.CacheControl(self,
                                      cache=DBCache(cache_file),
                                      heuristic=ExpiresAfter(days=7))

        # get result
        response = super(srSession, self).request(method, url, *args,
                                                  **kwargs).result()
        if raise_exceptions:
            response.raise_for_status()

        return response
Exemple #12
0
 def _get_requests_session(self, direct=False):
     if not direct:
         cache = cachecontrol.caches.FileCache(path.downloads('cache'))
         sess = cachecontrol.CacheControl(requests.Session(), cache=cache)
     else:
         sess = requests.Session()
     for uri, kwargs in self._mocks:
         adapter = requests_mock.Adapter()
         adapter.register_uri('HEAD', uri, **kwargs)
         adapter.register_uri('GET', uri, **kwargs)
         sess.mount(uri, adapter)
     return sess
Exemple #13
0
    def __init__(self, tokens: List[str]):
        """
        :param tokens: A list of GitHub tokens.
        """
        if not isinstance(tokens, list):
            raise TypeError('Tokens must be a list.')
        if not all(isinstance(t, str) for t in tokens):
            raise ValueError('All GitHub tokens must be given as strings.')

        self._tokens = deque(tokens)
        self._session = cachecontrol.CacheControl(requests.Session())
        # Start with the first token. We lazily switch tokens as each hits its quota limit.
        self._session.headers['Authorization'] = 'token %s' % self._tokens[0]
Exemple #14
0
 def __init__(self, app):
     session = cachecontrol.CacheControl(requests.Session())
     self.request = transport.requests.Request(session=session)
     self.id_token_verifier = _JWTVerifier(
         project_id=app.project_id, short_name='ID token',
         operation='verify_id_token()',
         doc_url='https://firebase.google.com/docs/auth/admin/verify-id-tokens',
         cert_url=ID_TOKEN_CERT_URI, issuer=ID_TOKEN_ISSUER_PREFIX)
     self.cookie_verifier = _JWTVerifier(
         project_id=app.project_id, short_name='session cookie',
         operation='verify_session_cookie()',
         doc_url='https://firebase.google.com/docs/auth/admin/verify-id-tokens',
         cert_url=COOKIE_CERT_URI, issuer=COOKIE_ISSUER_PREFIX)
Exemple #15
0
def get_session(output_dir, verbose=True):
    session = requests.Session()
    try:
        import cachecontrol
        import cachecontrol.caches
    except ImportError:
        if verbose:
            print("Tip: install CacheControl (conda package) to cache the CRAN metadata")
    else:
        session = cachecontrol.CacheControl(session,
            cache=cachecontrol.caches.FileCache(join(output_dir,
                '.web_cache')))
    return session
Exemple #16
0
    def __init__(self, session=True, priority=constants.Priority.Low):
        super().__init__(priority)
        if not _Request.default_session:
            _Request.default_session = constants.internaldb.network_session.get(
                cachecontrol.CacheControl(requests.Session()))
            self.default_session = _Request.default_session

        if session in (True, None):
            self.session = self.default_session
        elif isinstance(session, requests.Session):
            self.session = session
        else:
            self.session = requests
Exemple #17
0
        def wrapper(*args, **kvargs):
            token = request.headers.get('idToken')
            session = requests.session()
            cached_session = cachecontrol.CacheControl(session)
            transport_request = google.auth.transport.requests.Request(session=cached_session)

            decoded_token = id_token.verify_oauth2_token(token, transport_request)
            if decoded_token['iss'] != 'accounts.google.com':
                raise AuthenticationFailedException()

            if email and decoded_token['email'] != email:
                raise AuthenticationFailedException()

            return f(*args, **kvargs)
Exemple #18
0
    def __init__(self):
        self._session = cachecontrol.CacheControl(requests.Session())
        if credentials.TRAVIS_TOKENS:
            if not isinstance(credentials.TRAVIS_TOKENS, list):
                raise TypeError('TRAVIS_TOKENS must be a list.')
            if not all(isinstance(t, str) for t in credentials.TRAVIS_TOKENS):
                raise ValueError(
                    'All Travis CI Tokens must be given as strings.')

            # Start with the first token in TRAVIS_TOKENS list and pop() element from right and append to front
            # In the case where we are using 2 or more threads, each thread will grab the next token in the list
            self._session.headers['Authorization'] = 'token {}'.format(
                _TOKENS[0])
            _TOKENS.appendleft(_TOKENS.pop())
Exemple #19
0
 def _get_requests_session(self, direct=False):
     if not direct:
         kwargs = ({'filemode': 0o0660, 'dirmode': 0o0770}
                   if is_cache_group_writeable() else {})
         cache = cachecontrol.caches.FileCache(path.downloads('cache'),
                                               **kwargs)
         sess = cachecontrol.CacheControl(requests.Session(), cache=cache)
     else:
         sess = requests.Session()
     for uri, kwargs in self._mocks.items():
         adapter = requests_mock.Adapter()
         adapter.register_uri('HEAD', uri, **kwargs)
         adapter.register_uri('GET', uri, **kwargs)
         sess.mount(uri, adapter)
     return sess
def requests_session() -> requests.Session:
    """Creates a Requests-Cache session object."""

    global _session

    if _session is not None:
        return _session

    cache_name = cache_directory('blender_cloud_http')
    log.info('Storing cache in %s' % cache_name)

    _session = cachecontrol.CacheControl(sess=requests.session(),
                                         cache=FileCache(cache_name))

    return _session
def get_http_session():
    global _http_session

    if _http_session is None:
        _http_session = requests.session()

        if cachecontrol:
            _http_session = cachecontrol.CacheControl(
                _http_session,
                cache=FileCache(
                    user_cache_dir(__appname__, __appauthor__), forever=True
                ),
                heuristic=ExpiresAfter(days=14),
            )

    return _http_session
Exemple #22
0
def callback():
    flow.fetch_token(authorization_response=request.url)

    if not session["state"] == request.args["state"]:
        abort(500)  # State does not match

    credentials = flow.credentials
    request_session = requests.session()
    cached_session = cachecontrol.CacheControl(request_session)
    token_request = google.auth.transport.requests.Request(
        session=cached_session)

    id_infor = id_token.verify_oauth2_token(id_token=credentials._id_token,
                                            request=token_request,
                                            audience=GOOGLE_CLIENT_ID)
    session["google_id"] = id_infor.get("sub")
    session["name"] = id_infor.get("name")
    session["email"] = id_infor.get("email")
    session["picture"] = id_infor.get("picture")

    # Create a user in our db with the information provided by Google
    # Doesn't exist? Add to database
    user = User.query.filter(User.google_id == session["google_id"]).first()

    if user:
        # g for globally accessing user session information
        g.user = user
        login_user(user)
        session['logged_in'] = True
        return redirect("/my-account")
    else:
        create_user = User(google_id=session["google_id"],
                           name=session["name"],
                           email=session["email"],
                           profile_pic=session["picture"])
        db.session.add(create_user)
        db.session.commit()

        # Begin user session by logging the user in
        user = User.query.filter(
            User.google_id == session["google_id"]).first()

        # g for globally accessing user session information
        g.user = user
        login_user(user)
        session['logged_in'] = True
        return redirect("/my-account")
Exemple #23
0
def get_session(output_dir, verbose=True, cache=[]):
    if cache:
        return cache[0]
    session = requests.Session()
    try:
        import cachecontrol
        import cachecontrol.caches
    except ImportError:
        if verbose:
            print("Tip: install CacheControl to cache the CRAN metadata")
    else:
        session = cachecontrol.CacheControl(session,
            cache=cachecontrol.caches.FileCache(join(output_dir,
                '.web_cache')))

    cache.append(session)
    return session
Exemple #24
0
def github_api_ctor(
    github_url: str,
    verify_ssl: bool = True,
    session_adapter: SessionAdapter = SessionAdapter.RETRY,
):
    '''returns the appropriate github3.GitHub constructor for the given github URL

    In case github_url does not refer to github.com, the c'tor for GithubEnterprise is
    returned with the url argument preset, thus disburdening users to differentiate
    between github.com and non-github.com cases.
    '''
    parsed = urllib.parse.urlparse(github_url)
    if parsed.scheme:
        hostname = parsed.hostname
    else:
        raise ValueError('failed to parse url: ' + str(github_url))

    session = github3.session.GitHubSession()
    session_adapter = SessionAdapter(session_adapter)
    if session_adapter is SessionAdapter.NONE:
        pass
    elif session_adapter is SessionAdapter.RETRY:
        session = http_requests.mount_default_adapter(session)
    elif session_adapter is SessionAdapter.CACHE:
        session = cachecontrol.CacheControl(
            session,
            cache_etags=True,
        )
    else:
        raise NotImplementedError

    if log_github_access:
        session.hooks['response'] = log_stack_trace_information_hook

    if hostname.lower() == 'github.com':
        return functools.partial(
            github3.github.GitHub,
            session=session,
        )
    else:
        return functools.partial(
            github3.github.GitHubEnterprise,
            url=github_url,
            verify=verify_ssl,
            session=session,
        )
Exemple #25
0
    def __init__(self, grace=1):
        self._proxies = {'http': 'http://127.0.0.1:3128'}
        self._html2text = html2text.HTML2Text()
        self._html2text.ignore_links = True
        self._html2text.ignore_images = True
        self._html2text.body_width = 78
        self._html2text.images_to_alt = True

        self._grace = grace
        reload(sys)
        sys.setdefaultencoding('UTF-8')
        self._stockholm = pytz.timezone('Europe/Stockholm')

        tidylib.BASE_OPTIONS['bare'] = 1
        tidylib.BASE_OPTIONS['clean'] = 1
        tidylib.BASE_OPTIONS['drop-empty-paras'] = 1
        tidylib.BASE_OPTIONS['drop-font-tags'] = 1
        tidylib.BASE_OPTIONS['drop-proprietary-attributes'] = 1
        tidylib.BASE_OPTIONS['enclose-block-text'] = 1
        tidylib.BASE_OPTIONS['escape-cdata'] = 1
        tidylib.BASE_OPTIONS['hide-comments'] = 1
        tidylib.BASE_OPTIONS['logical-emphasis'] = 1
        tidylib.BASE_OPTIONS['output-xhtml'] = 1
        tidylib.BASE_OPTIONS['quote-nbsp'] = 1
        tidylib.BASE_OPTIONS['replace-color'] = 1
        tidylib.BASE_OPTIONS['uppercase-tags'] = 1
        tidylib.BASE_OPTIONS['break-before-br'] = 1
        tidylib.BASE_OPTIONS['indent'] = 1
        tidylib.BASE_OPTIONS['indent-attributes'] = 1
        tidylib.BASE_OPTIONS['indent-spaces'] = 1
        tidylib.BASE_OPTIONS['markup'] = 1
        tidylib.BASE_OPTIONS['punctuation-wrap'] = 1
        tidylib.BASE_OPTIONS['tab-size'] = 4
        tidylib.BASE_OPTIONS['vertical-space'] = 1
        tidylib.BASE_OPTIONS['wrap'] = 80
        tidylib.BASE_OPTIONS['wrap-script-literals'] = 1
        tidylib.BASE_OPTIONS['char-encoding'] = 'utf8'

        self._articles = {}
        self._keywords = {}
        sess = requests.session()
        self._cached_sess = cachecontrol.CacheControl(sess)
def test_deletion_with_cache_control(cache, local_server):
    """Check that issuing a delete clears the entry from the cache"""
    app, port = local_server

    url = f"http://localhost:{port}/"

    proxy = CacheControlProxy(cache)

    sesh = requests.Session()
    cached_sesh = cachecontrol.CacheControl(sesh, cache=proxy)

    response1 = cached_sesh.get(url)
    assert response1.status_code == 200
    assert response1.content == b"hello"

    response2 = cached_sesh.delete(url)
    assert response2.status_code == 200
    assert response2.content == b"goodbye"

    assert proxy.get(url) is None
def test_request_with_cache_control(cache, local_server):
    """Check that requests are cached properly"""
    app, port = local_server
    initial_calls = app.config["calls"]

    url = f"http://localhost:{port}/"

    proxy = CacheControlProxy(cache)

    sesh = requests.Session()
    cached_sesh = cachecontrol.CacheControl(sesh, cache=proxy)

    response1 = cached_sesh.get(url)
    assert response1.status_code == 200
    assert response1.content == b"hello"

    response2 = cached_sesh.get(url)
    assert response2.status_code == 200
    assert response2.content == b"hello"

    assert app.config["calls"] == (initial_calls + 1)
Exemple #28
0
    def __init__(
        self,
        project_id: Optional[Text] = None,
        hangouts_user_added_intent_name: Optional[Text] = "/user_added",
        hangouts_room_added_intent_name: Optional[Text] = "/room_added",
        hangouts_removed_intent_name: Optional[Text] = "/bot_removed",
    ) -> None:

        self.project_id = project_id
        self.hangouts_user_added_intent_name = hangouts_user_added_intent_name
        self.hangouts_room_added_intent_name = hangouts_room_added_intent_name
        self.hangouts_user_added_intent_name = hangouts_removed_intent_name

        # Google's Request obj (this is used to make HTTP requests) uses cached
        # session to fetch Google's service certs. Certs don't change frequently,
        # so it makes sense to cache request body, rather than getting it again
        # every message. Actual caching depends on response headers.
        # see: https://github.com/googleapis/google-auth-library-python/blob/main/google/oauth2/id_token.py#L15 # noqa: E501, W505
        cached_session = cachecontrol.CacheControl(requests.session())
        self.google_request = google.auth.transport.requests.Request(
            session=cached_session
        )
Exemple #29
0
def download_courses(flush_count=1000):
    """Downloads all studyportal course data to the global DATA_PATH path,
    including metadata for convenience analysis of course data. The output
    directory structure under DATA_PATH/courses will look like:

    38                               <--- discipline #38
    ├── bachelor
    │   ├── 38-bachelor-1000.json    <--- the first 1000 BACHELOR courses for discipline #38
    │   └── 38-bachelor-2000.json    <--- up to the next 1000 BACHELOR courses for discipline #38
    ├── master
    │   ├── 38-master-1000.json      <--- the first 1000 MASTER courses for discipline #38
    │   └── 38-master-2000.json      <--- up to the next 1000 MASTER courses for discipline #38
    ├── phd
    │   └── 38-phd-1000.json         <--- up to the first 1000 PHD courses for discipline #38
    ├── preparation
    │   └── 38-preparation-1000.json <--- up to the first 1000 PREPARATION courses for discipline #38
    └── short
        └── 38-short-1000.json       <--- up to the first 1000 SHORT courses for discipline #38


    Important/interesting notes:
    1) Courses can have multiple disciplines. The average number of disciplines per course is 1.79.
    2) There are over 170k courses, and over 200 disciplines.
    3) The same discipline ontology is used for all course levels (Bachelor, PhD, etc)
    4) The discipline ontology is nested at 2 levels.

    There are three metadata files to help you navigate the data:

    1) discipline_dictionary.json: Metadata for disciplines, including disclipline name and parent name
    2) course_discipline_lookup.json: Look-up table of course_id --> [discipline ids]
    3) discipline_course_lookup.json: Look-up table of discipline_id --> [course ids]

    Args:
        flush_count (int): Maximum number of courses in any file saved to disk.
    """
    if not os.path.exists(DATA_PATH):
        raise OSError(f'Output path {DATA_PATH} does not exist')

    session = cachecontrol.CacheControl(requests.Session())
    disciplines = discover_disciplines(session, BACHELOR_DISCIPLINES_URL)

    # Output containers
    courses = defaultdict(list)  # Flushable course container
    course_count = defaultdict(int)  # Count of all courses, for book-keeping
    course_discipline_lookup = defaultdict(
        SortedList)  # Course-discipline look-up
    discipline_course_lookup = defaultdict(
        SortedList)  # ...and the reverse look-up
    for (di, lvl), course in discover_courses(session, disciplines):
        key = (di, lvl)
        ci = course['id']
        courses[key].append(course)
        course_discipline_lookup[ci].add(di)
        discipline_course_lookup[di].add(ci)
        # Flush if threshold count is reached
        if len(courses[key]) == flush_count:
            course_count[key] += flush_count
            flush(courses, di, lvl, course_count[key])
            del courses[key]  # Now we've flushed, free up some memory
    # Flush remaining collections of courses that never went over flush threshold
    for (di, lvl) in courses:
        key = (di, lvl)
        flush(courses, di, lvl, course_count[key] + flush_count)
        del courses[key]  # Not really necessary, but not unnecessary

    # Reformat defaultdict(SortedList) ready for json serialization
    course_discipline_lookup = standardise_ddsl(course_discipline_lookup)
    discipline_course_lookup = standardise_ddsl(discipline_course_lookup)

    # Save the handy lookup tables
    write_json(course_discipline_lookup,
               f'{DATA_PATH}/course_discipline_lookup.json')
    write_json(discipline_course_lookup,
               f'{DATA_PATH}/discipline_course_lookup.json')
    write_json(disciplines, f'{DATA_PATH}/discipline_dictionary.json')
Exemple #30
0
                 Column('itemName', VARCHAR(length=200), nullable=False),
                 schema=schema)

maincorplist = []

corpurl = "https://esi.evetech.net/latest/corporations/npccorps/?datasource=tranquility"

corplookupurl = 'https://esi.evetech.net/latest/corporations/{}/?datasource=tranquility'

errorcount = 0

redis_connection = redis.Redis(host=redis_server,
                               db=redis_db,
                               retry_on_timeout=True,
                               health_check_interval=30)
base_session = cachecontrol.CacheControl(
    requests.session(),
    cachecontrol.caches.redis_cache.RedisCache(redis_connection))

reqs_num_workers = 50

session = FuturesSession(max_workers=reqs_num_workers, session=base_session)

corps = requests.get(corpurl).json()

firstbadlist = getcorps(corps)
print "Getting badlist"
secondbadlist = getcorps(firstbadlist)

trans.commit()