async def fetch_url(url):
    async with aiohttp.ClientSession(
            headers=headers,
            connector=aiohttp.TCPConnector(ssl=False)) as session:
        async with session.get(url) as response:
            return await response.text()
Exemple #2
0
def test_dont_recreate_ssl_context(loop):
    conn = aiohttp.TCPConnector(loop=loop)
    ctx = conn.ssl_context
    assert ctx is conn.ssl_context
Exemple #3
0
 def session(self) -> aiohttp.ClientSession:
     connector = aiohttp.TCPConnector(ssl=self._ssl)
     self._session = self._session or aiohttp.ClientSession(
         connector=connector, timeout=self._timeout)
     return self._session
Exemple #4
0
def test_tcp_connector_ctor_fingerprint_valid(loop):
    valid = b'\xa2\x06G\xad\xaa\xf5\xd8\\J\x99^by;\x06='
    conn = aiohttp.TCPConnector(loop=loop, fingerprint=valid)
    assert conn.fingerprint == valid
Exemple #5
0
def test_tcp_connector_clear_dns_cache_bad_args(loop):
    conn = aiohttp.TCPConnector(loop=loop)
    with pytest.raises(ValueError):
        conn.clear_dns_cache('localhost')
Exemple #6
0
 async def on_connect(self):
     await cleanup()
     global session
     connector = aiohttp.TCPConnector(limit=60)
     session = aiohttp.ClientSession(connector=connector)
Exemple #7
0
def request(method, url, *,
            params=None,
            data=None,
            headers=None,
            skip_auto_headers=None,
            cookies=None,
            auth=None,
            allow_redirects=True,
            max_redirects=10,
            encoding='utf-8',
            version=None,
            compress=None,
            chunked=None,
            expect100=False,
            connector=None,
            loop=None,
            read_until_eof=True,
            request_class=None,
            response_class=None,
            proxy=None,
            proxy_auth=None):
    """Constructs and sends a request. Returns response object.

    method - HTTP method
    url - request url
    params - (optional) Dictionary or bytes to be sent in the query
      string of the new request
    data - (optional) Dictionary, bytes, or file-like object to
      send in the body of the request
    headers - (optional) Dictionary of HTTP Headers to send with
      the request
    cookies - (optional) Dict object to send with the request
    auth - (optional) BasicAuth named tuple represent HTTP Basic Auth
    auth - aiohttp.helpers.BasicAuth
    allow_redirects - (optional) If set to False, do not follow
      redirects
    version - Request HTTP version.
    compress - Set to True if request has to be compressed
       with deflate encoding.
    chunked - Set to chunk size for chunked transfer encoding.
    expect100 - Expect 100-continue response from server.
    connector - BaseConnector sub-class instance to support
       connection pooling.
    read_until_eof - Read response until eof if response
       does not have Content-Length header.
    request_class - (optional) Custom Request class implementation.
    response_class - (optional) Custom Response class implementation.
    loop - Optional event loop.

    Usage::

      >>> import aiohttp
      >>> resp = yield from aiohttp.request('GET', 'http://python.org/')
      >>> resp
      <ClientResponse(python.org/) [200]>
      >>> data = yield from resp.read()

    """
    warnings.warn("Use ClientSession().request() instead", DeprecationWarning)
    if connector is None:
        connector = aiohttp.TCPConnector(loop=loop, force_close=True)

    kwargs = {}

    if request_class is not None:
        kwargs['request_class'] = request_class

    if response_class is not None:
        kwargs['response_class'] = response_class

    session = ClientSession(loop=loop,
                            cookies=cookies,
                            connector=connector,
                            **kwargs)
    return _DetachedRequestContextManager(
        session._request(method, url,
                         params=params,
                         data=data,
                         headers=headers,
                         skip_auto_headers=skip_auto_headers,
                         auth=auth,
                         allow_redirects=allow_redirects,
                         max_redirects=max_redirects,
                         encoding=encoding,
                         version=version,
                         compress=compress,
                         chunked=chunked,
                         expect100=expect100,
                         read_until_eof=read_until_eof,
                         proxy=proxy,
                         proxy_auth=proxy_auth,),
        session=session)
Exemple #8
0
 def __init__(self, endpoint, *, loop):
     self._endpoint = endpoint
     self._session = aiohttp.ClientSession(
         connector=aiohttp.TCPConnector(use_dns_cache=True, loop=loop),
         loop=loop)
     self._base_url = 'http://{0.host}:{0.port}/'.format(endpoint)
Exemple #9
0
async def init():
    bot.session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(
        family=socket.AF_INET))
Exemple #10
0
def request(method, url, *,
            params=None,
            data=None,
            headers=None,
            cookies=None,
            files=None,
            auth=None,
            allow_redirects=True,
            max_redirects=10,
            encoding='utf-8',
            version=aiohttp.HttpVersion11,
            compress=None,
            chunked=None,
            expect100=False,
            connector=None,
            loop=None,
            read_until_eof=True,
            request_class=None,
            response_class=None):
    """Constructs and sends a request. Returns response object.

    :param str method: http method
    :param str url: request url
    :param params: (optional) Dictionary or bytes to be sent in the query
      string of the new request
    :param data: (optional) Dictionary, bytes, or file-like object to
      send in the body of the request
    :param dict headers: (optional) Dictionary of HTTP Headers to send with
      the request
    :param dict cookies: (optional) Dict object to send with the request
    :param auth: (optional) BasicAuth named tuple represent HTTP Basic Auth
    :type auth: aiohttp.helpers.BasicAuth
    :param bool allow_redirects: (optional) Set to True if POST/PUT/DELETE
       redirect following is allowed.
    :param version: Request http version.
    :type version: aiohttp.protocol.HttpVersion
    :param bool compress: Set to True if request has to be compressed
       with deflate encoding.
    :param chunked: Set to chunk size for chunked transfer encoding.
    :type chunked: bool or int
    :param bool expect100: Expect 100-continue response from server.
    :param connector: BaseConnector sub-class instance to support
       connection pooling and session cookies.
    :type connector: aiohttp.connector.BaseConnector
    :param bool read_until_eof: Read response until eof if response
       does not have Content-Length header.
    :param request_class: (optional) Custom Request class implementation.
    :param response_class: (optional) Custom Response class implementation.
    :param loop: Optional event loop.

    Usage::

      >>> import aiohttp
      >>> resp = yield from aiohttp.request('GET', 'http://python.org/')
      >>> resp
      <ClientResponse(python.org/) [200]>
      >>> data = yield from resp.read()

    """
    redirects = 0
    method = method.upper()
    if loop is None:
        loop = asyncio.get_event_loop()
    if request_class is None:
        request_class = ClientRequest
    if connector is None:
        connector = aiohttp.TCPConnector(force_close=True, loop=loop)

    while True:
        req = request_class(
            method, url, params=params, headers=headers, data=data,
            cookies=cookies, files=files, encoding=encoding,
            auth=auth, version=version, compress=compress, chunked=chunked,
            loop=loop, expect100=expect100, response_class=response_class)

        try:
            conn = yield from connector.connect(req)

            resp = req.send(conn.writer, conn.reader)
            try:
                yield from resp.start(conn, read_until_eof)
            except:
                resp.close()
                conn.close()
                raise
        except aiohttp.BadStatusLine as exc:
            raise aiohttp.ClientConnectionError(exc)
        except OSError as exc:
            raise aiohttp.OsConnectionError(exc)

        # redirects
        if resp.status in (301, 302, 303, 307) and allow_redirects:
            redirects += 1
            if max_redirects and redirects >= max_redirects:
                resp.close(force=True)
                break

            # For 301 and 302, mimic IE behaviour, now changed in RFC.
            # Details: https://github.com/kennethreitz/requests/pull/269
            if resp.status != 307:
                method = 'GET'
                data = None
            cookies = resp.cookies

            r_url = resp.headers.get('LOCATION') or resp.headers.get('URI')

            scheme = urllib.parse.urlsplit(r_url)[0]
            if scheme not in ('http', 'https', ''):
                resp.close(force=True)
                raise ValueError('Can redirect only to http or https')
            elif not scheme:
                r_url = urllib.parse.urljoin(url, r_url)

            url = urllib.parse.urldefrag(r_url)[0]
            if url:
                yield from asyncio.async(resp.release(), loop=loop)
                continue

        break

    return resp
Exemple #11
0
    def get_outgoing_mappings(self, cfg):
        """Reads the outgoing webhook definitions from the config file.

        This also sets up the HTTP client session for each webhook."""
        bridges = cfg['bridges']
        for bridge in bridges:
            if 'outgoing_webhooks' not in bridge:
                # No outgoing webhooks in this bridge.
                continue

            outgoing_webhooks = bridge['outgoing_webhooks']
            xmpp_endpoints = bridge['xmpp_endpoints']

            # Check whether all normal messages to this bridge should be
            # relayed.
            relay_all_normal = False
            for xmpp_endpoint in xmpp_endpoints:
                if ('relay_all_normal' in xmpp_endpoint
                        and xmpp_endpoint['relay_all_normal'] is True):
                    relay_all_normal = True
                    break

            for outgoing_webhook in outgoing_webhooks:
                if 'url' not in outgoing_webhook:
                    raise InvalidConfigError("Error in config file: "
                                             "'url' is missing from an "
                                             "outgoing webhook definition.")

                # Set up SSL context for certificate pinning.
                if 'cafile' in outgoing_webhook:
                    cafile = os.path.abspath(outgoing_webhook['cafile'])
                    sslcontext = ssl.create_default_context(cafile=cafile)
                    conn = aiohttp.TCPConnector(ssl_context=sslcontext)
                    session = aiohttp.ClientSession(loop=self.loop,
                                                    connector=conn)
                else:
                    session = aiohttp.ClientSession(loop=self.loop)
                # TODO: Handle ConnectionRefusedError.
                outgoing_webhook['session'] = session

                if relay_all_normal:
                    self.outgoing_mappings['all_normal'].append(
                        outgoing_webhook)

                for xmpp_endpoint in xmpp_endpoints:
                    # Determine whether the JID corresponds to a MUC or a
                    # normal chat:
                    if 'muc' in xmpp_endpoint:
                        if xmpp_endpoint['muc'] not in self.mucs:
                            raise InvalidConfigError(
                                "Error in config file: XMPP MUC '{}' was not "
                                "defined in the xmpp.mucs section.".format(
                                    xmpp_endpoint['muc']))

                        self.outgoing_mappings[xmpp_endpoint['muc']].append(
                            outgoing_webhook)
                    elif 'normal' in xmpp_endpoint:
                        if relay_all_normal:
                            # Don't add normal JIDs when all normal messages
                            # are relayed anyways.
                            continue
                        self.outgoing_mappings[xmpp_endpoint['normal']].append(
                            outgoing_webhook)
Exemple #12
0
async def get(url, params=None, headers=None):
    connector = aiohttp.TCPConnector(verify_ssl=False)
    with aiohttp.ClientSession(connector=connector) as session:
        async with session.get(url, params=params, headers=headers) as resp:
            return await resp.text()
async def get(url):
    async with aiohttp.ClientSession(
        connector=aiohttp.TCPConnector(ssl=False)) as session:
        async with session.get(url) as res:
            return await res.text()
Exemple #14
0
def default_session():
    connector = aiohttp.TCPConnector(limit=None, verify_ssl=False)
    session = aiohttp.ClientSession(connector=connector)
    return session
Exemple #15
0
def _craft_aiohttp_connector(context):
    return aiohttp.TCPConnector()
Exemple #16
0
async def main(signals=None, site_settings=None):
    template_path = TemplatePathSettings().template_path
    if site_settings is None:
        site_settings = SiteSettings()
    if not site_settings.check_if_valid():
        logger.critical("Settings are not correctly configured. "
                        "Please run 'python main.py --help' for more info. "
                        "Exiting...")
        return

    ssl_context = ssl.create_default_context(cafile=certifi.where())
    conn = aiohttp.TCPConnector(ssl=ssl_context,
                                limit=site_settings.conn_limit,
                                limit_per_host=site_settings.conn_limit_per_host)

    async with monitor.MonitorSession(signals=signals, raise_for_status=True, connector=conn,
                                      timeout=aiohttp.ClientTimeout(30)) as session:
        logger.debug(f"Loading template: {template_path}")
        queue = unique_queue.UniqueQueue()
        producers = []
        cancellable_pool = CancellablePool()
        template_file = os.path.join(os.path.dirname(__file__), template_path)
        template = template_parser.Template(path=template_file, signals=signals)
        try:
            template.load()
        except Exception as e:
            logger.critical(f"A critical error occurred while passing the template."
                            f" {type(e).__name__}: {e}. Exiting...", exc_info=True)
            return

        await template.run_root(producers,
                                session,
                                queue,
                                site_settings=site_settings,
                                cancellable_pool=cancellable_pool)

        user_statistic = asyncio.ensure_future(async_user_statistics(session, site_settings.username))

        logger.debug(f"Checking for update")
        latest_version = await async_get_latest_version(session)
        if latest_version != VERSION:
            logger.info(f"A new update is available. Update with 'git pull'."
                        f" New version: {latest_version}. Current version {VERSION}")

        logger.debug("Starting consumers")
        consumers = [asyncio.ensure_future(downloader.download_files(session, queue)) for _ in range(20)]

        logger.debug("Gathering producers")
        await asyncio.gather(*producers)

        logger.debug("Waiting for queue")

        num_unfinished_downloads = queue.qsize() + queue._unfinished_tasks
        if num_unfinished_downloads:
            logger.info(f"Waiting for {num_unfinished_downloads} potential download(s) to finish")
        await queue.join()

        logger.debug("Cancel consumers")
        for c in consumers:
            c.cancel()

        cancellable_pool.shutdown()

        await user_statistic
Exemple #17
0
    def __init__(self, ad, name, logger, error, loglevel, args):

        self.AD = ad
        self.logger = logger
        self.error = error
        self.stopping = False
        self.config = args
        self.loglevel = loglevel
        self.ws = None
        self.reading_messages = False
        self.name = name

        self.log("INFO", "HASS Plugin Initializing")

        self.name = name

        if "namespace" in args:
            self.namespace = args["namespace"]
        else:
            self.namespace = "default"

        if "verbose" in args:
            self.verbose = args["verbose"]
        else:
            self.verbose = False

        if "ha_key" in args:
            self.ha_key = args["ha_key"]
        else:
            self.ha_key = ""

        if "ha_url" in args:
            self.ha_url = args["ha_url"]
        else:
            self.log("WARN", "ha_url not found in HASS configuration - module not initialized")

        if "cert_path" in args:
            self.cert_path = args["cert_path"]
        else:
            self.cert_path = None

        if "timeout" in args:
            self.timeout = args["timeout"]
        else:
            self.timeout = None

        if "cert_verify" in args:
            self.cert_verify = args["cert_verify"]
        else:
            self.cert_verify = True

        if "commtype" in args:
            self.commtype = args["commtype"]
        else:
            self.commtype = "WS"

        #
        # Set up HTTP Client
        #
        conn = aiohttp.TCPConnector()
        self.session = aiohttp.ClientSession(connector=conn)

        self.log("INFO", "HASS Plugin initialization complete")
Exemple #18
0
 def init_session(self, loop):
     connector = aiohttp.TCPConnector(ttl_dns_cache=300)
     self._session = aiohttp.ClientSession(connector=connector, loop=loop)
     return self._session
Exemple #19
0
    def __init__(self, *, connector=None, loop=None, cookies=None,
                 headers=None, skip_auto_headers=None,
                 auth=None, request_class=ClientRequest,
                 response_class=ClientResponse,
                 ws_response_class=ClientWebSocketResponse,
                 version=aiohttp.HttpVersion11,
                 cookie_jar=None, read_timeout=None, time_service=None):

        implicit_loop = False
        if loop is None:
            if connector is not None:
                loop = connector._loop
            else:
                implicit_loop = True
                loop = asyncio.get_event_loop()

        if connector is None:
            connector = aiohttp.TCPConnector(loop=loop)

        if connector._loop is not loop:
            raise RuntimeError(
                "Session and connector has to use same event loop")

        self._loop = loop

        if loop.get_debug():
            self._source_traceback = traceback.extract_stack(sys._getframe(1))

        if implicit_loop and not loop.is_running():
            warnings.warn("Creating a client session outside of coroutine is "
                          "a very dangerous idea", ResourceWarning,
                          stacklevel=2)
            context = {'client_session': self,
                       'message': 'Creating a client session outside '
                       'of coroutine'}
            if self._source_traceback is not None:
                context['source_traceback'] = self._source_traceback
            loop.call_exception_handler(context)

        if cookie_jar is None:
            cookie_jar = CookieJar(loop=loop)
        self._cookie_jar = cookie_jar

        if cookies is not None:
            self._cookie_jar.update_cookies(cookies)
        self._connector = connector
        self._default_auth = auth
        self._version = version
        self._read_timeout = read_timeout

        # Convert to list of tuples
        if headers:
            headers = CIMultiDict(headers)
        else:
            headers = CIMultiDict()
        self._default_headers = headers
        if skip_auto_headers is not None:
            self._skip_auto_headers = frozenset([istr(i)
                                                 for i in skip_auto_headers])
        else:
            self._skip_auto_headers = frozenset()

        self._request_class = request_class
        self._response_class = response_class
        self._ws_response_class = ws_response_class

        if time_service is not None:
            self._time_service_owner = False
            self._time_service = time_service
        else:
            self._time_service_owner = True
            self._time_service = TimeService(self._loop)
 async def connect(self):
     if self.session is None:
         self.session = aiohttp.ClientSession(
             connector=aiohttp.TCPConnector(ssl=False))
Exemple #21
0
def test_default_use_dns_cache(loop):
    conn = aiohttp.TCPConnector(loop=loop)
    assert conn.use_dns_cache
Exemple #22
0
 def request_timeout(self, timeout):
     self._request_timeout = timeout
     self.aiohttp_clientsession = aiohttp.ClientSession(
         connector=aiohttp.TCPConnector(limit=self.request_batch_size * 2),
         timeout=aiohttp.ClientTimeout(total=timeout))
Exemple #23
0
def test_tcp_connector_fingerprint_invalid(loop):
    invalid = b'\x00'
    with pytest.raises(ValueError):
        aiohttp.TCPConnector(loop=loop, fingerprint=invalid)
Exemple #24
0
import asyncio
import aiohttp
import async_timeout
import atexit
import re
import json
from .. import exception
from ..api import _methodurl, _which_pool, _fileurl, _guess_filename

_loop = asyncio.get_event_loop()

_pools = {
    'default':
    aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit=10), loop=_loop)
}

_timeout = 30
_proxy = None  # (url, (username, password))


def set_proxy(url, basic_auth=None):
    global _proxy
    if not url:
        _proxy = None
    else:
        _proxy = (url, basic_auth) if basic_auth else (url, )


def _close_pools():
    global _pools
    for s in _pools.values():
Exemple #25
0
def test_ambigous_verify_ssl_and_ssl_context(loop):
    with pytest.raises(ValueError):
        aiohttp.TCPConnector(verify_ssl=False,
                             ssl_context=ssl.SSLContext(ssl.PROTOCOL_SSLv23),
                             loop=loop)
Exemple #26
0
def _create_onetime_pool():
    return aiohttp.ClientSession(connector=aiohttp.TCPConnector(
        limit=1, force_close=True),
                                 loop=_loop)
Exemple #27
0
def test_respect_precreated_ssl_context(loop):
    ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
    conn = aiohttp.TCPConnector(loop=loop, ssl_context=ctx)
    assert ctx is conn.ssl_context
Exemple #28
0
    def __init__(
        self,
        info: credentials.ConnectionInfo,
    ) -> None:
        super().__init__()

        # Some SSL data are not accepted directly, so we have to use temp files.
        tempfiles = _TempFiles()
        ca_path: Optional[str]
        certificate_path: Optional[str]
        private_key_path: Optional[str]

        if info.ca_path and info.ca_data:
            raise credentials.LoginError(
                "Both CA path & data are set. Need only one.")
        elif info.ca_path:
            ca_path = info.ca_path
        elif info.ca_data:
            ca_path = tempfiles[base64.b64decode(info.ca_data)]
        else:
            ca_path = None

        if info.certificate_path and info.certificate_data:
            raise credentials.LoginError(
                "Both certificate path & data are set. Need only one.")
        elif info.certificate_path:
            certificate_path = info.certificate_path
        elif info.certificate_data:
            certificate_path = tempfiles[base64.b64decode(
                info.certificate_data)]
        else:
            certificate_path = None

        if info.private_key_path and info.private_key_data:
            raise credentials.LoginError(
                "Both private key path & data are set. Need only one.")
        elif info.private_key_path:
            private_key_path = info.private_key_path
        elif info.private_key_data:
            private_key_path = tempfiles[base64.b64decode(
                info.private_key_data)]
        else:
            private_key_path = None

        # The SSL part (both client certificate auth and CA verification).
        context: ssl.SSLContext
        if certificate_path and private_key_path:
            context = ssl.create_default_context(
                purpose=ssl.Purpose.CLIENT_AUTH, cafile=ca_path)
            context.load_cert_chain(certfile=certificate_path,
                                    keyfile=private_key_path)
        else:
            context = ssl.create_default_context(cafile=ca_path)

        if info.insecure:
            context.check_hostname = False
            context.verify_mode = ssl.CERT_NONE

        # The token auth part.
        headers: Dict[str, str] = {}
        if info.scheme and info.token:
            headers['Authorization'] = f'{info.scheme} {info.token}'
        elif info.scheme:
            headers['Authorization'] = f'{info.scheme}'
        elif info.token:
            headers['Authorization'] = f'Bearer {info.token}'

        # The basic auth part.
        auth: Optional[aiohttp.BasicAuth]
        if info.username and info.password:
            auth = aiohttp.BasicAuth(info.username, info.password)
        else:
            auth = None

        # It is a good practice to self-identify a bit.
        headers['User-Agent'] = f'kopf/unknown'  # TODO: add version someday

        # Generic aiohttp session based on the constructed credentials.
        self.session = aiohttp.ClientSession(
            connector=aiohttp.TCPConnector(
                limit=0,
                ssl=context,
            ),
            headers=headers,
            auth=auth,
        )

        # Add the extra payload information. We avoid overriding the constructor.
        self.server = info.server
        self.default_namespace = info.default_namespace

        # For purging on garbage collection.
        self._tempfiles = tempfiles
        self._discovery_lock = asyncio.Lock()
        self._discovered_resources = {}
Exemple #29
0
async def maigret(username, site_dict, query_notify, logger,
                  proxy=None, timeout=None, recursive_search=False,
                  id_type='username', tags=None, debug=False, forced=False,
                  max_connections=100, no_progressbar=False):
    """Main search func

    Checks for existence of username on various social media sites.

    Keyword Arguments:
    username               -- String indicating username that report
                              should be created against.
    site_dict              -- Dictionary containing all of the site data.
    query_notify           -- Object with base type of QueryNotify().
                              This will be used to notify the caller about
                              query results.
    proxy                  -- String indicating the proxy URL
    timeout                -- Time in seconds to wait before timing out request.
                              Default is no timeout.
    recursive_search       -- Search for other usernames in website pages & recursive search by them.

    Return Value:
    Dictionary containing results from report. Key of dictionary is the name
    of the social network site, and the value is another dictionary with
    the following keys:
        url_main:      URL of main site.
        url_user:      URL of user on site (if account exists).
        status:        QueryResult() object indicating results of test for
                       account existence.
        http_status:   HTTP status code of query which checked for existence on
                       site.
        response_text: Text that came back from request.  May be None if
                       there was an HTTP error when checking for existence.
    """

    # Notify caller that we are starting the query.
    if tags is None:
        tags = set()
    query_notify.start(username, id_type)

    # TODO: connector
    connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
    # connector = aiohttp.TCPConnector(ssl=False)
    connector.verify_ssl=False
    session = aiohttp.ClientSession(connector=connector)

    if logger.level == logging.DEBUG:
        future = session.get(url='https://icanhazip.com')
        ip, status, error, expection = await get_response(future, None, logger)
        if ip:
            logger.debug(f'My IP is: {ip.strip()}')
        else:
            logger.debug(f'IP requesting {error}: {expection}')


    # Results from analysis of all sites
    results_total = {}

    # First create futures for all requests. This allows for the requests to run in parallel
    for site_name, site in site_dict.items():

        fulltags = site.tags

        if site.type != id_type:
            continue

        site_tags = set(fulltags)
        if tags:
            if not set(tags).intersection(site_tags):
                continue

        if site.disabled and not forced:
            continue

        # Results from analysis of this specific site
        results_site = {}

        # Record URL of main site and username
        results_site['username'] = username
        results_site['parsing_enabled'] = recursive_search
        results_site['url_main'] = site.url_main

        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
        }

        headers.update(site.headers)

        if not 'url' in site.__dict__:
            logger.error('No URL for site %s', site.name)
        # URL of user on site (if it exists)
        url = site.url.format(
            urlMain=site.url_main,
            urlSubpath=site.url_subpath,
            username=username
        )
        # workaround to prevent slash errors
        url = re.sub('(?<!:)/+', '/', url)

        # Don't make request if username is invalid for the site
        if site.regex_check and re.search(site.regex_check, username) is None:
            # No need to do the check at the site: this user name is not allowed.
            results_site['status'] = QueryResult(username,
                                                 site_name,
                                                 url,
                                                 QueryStatus.ILLEGAL)
            results_site["url_user"] = ""
            results_site['http_status'] = ""
            results_site['response_text'] = ""
            query_notify.update(results_site['status'])
        else:
            # URL of user on site (if it exists)
            results_site["url_user"] = url
            url_probe = site.url_probe
            if url_probe is None:
                # Probe URL is normal one seen by people out on the web.
                url_probe = url
            else:
                # There is a special URL for probing existence separate
                # from where the user profile normally can be found.
                url_probe = url_probe.format(
                    urlMain=site.url_main,
                    urlSubpath=site.url_subpath,
                    username=username,
                )


            if site.check_type == 'status_code' and site.request_head_only:
                # In most cases when we are detecting by status code,
                # it is not necessary to get the entire body:  we can
                # detect fine with just the HEAD response.
                request_method = session.head
            else:
                # Either this detect method needs the content associated
                # with the GET response, or this specific website will
                # not respond properly unless we request the whole page.
                request_method = session.get

            if site.check_type == "response_url":
                # Site forwards request to a different URL if username not
                # found.  Disallow the redirect so we can capture the
                # http status from the original URL request.
                allow_redirects = False
            else:
                # Allow whatever redirect that the site wants to do.
                # The final result of the request will be what is available.
                allow_redirects = True

            # TODO: cookies using
            # def parse_cookies(cookies_str):
            #     cookies = SimpleCookie()
            #     cookies.load(cookies_str)
            #     return {key: morsel.value for key, morsel in cookies.items()}
            #
            # if os.path.exists(cookies_file):
            #     cookies_obj = cookielib.MozillaCookieJar(cookies_file)
            #     cookies_obj.load(ignore_discard=True, ignore_expires=True)

            future = request_method(url=url_probe, headers=headers,
                                    allow_redirects=allow_redirects,
                                    timeout=timeout,
                                    )

            # Store future in data for access later
            # TODO: move to separate obj
            site.request_future = future

        # Add this site's results into final dictionary with all of the other results.
        results_total[site_name] = results_site

    # TODO: move into top-level function

    sem = asyncio.Semaphore(max_connections)

    tasks = []
    for sitename, result_obj in results_total.items():
        update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify)
        future = asyncio.ensure_future(update_site_coro)
        tasks.append(future)

    if no_progressbar:
        await asyncio.gather(*tasks)
    else:
        for f in tqdm.asyncio.tqdm.as_completed(tasks):
            await f

    await session.close()

    # Notify caller that all queries are finished.
    query_notify.finish()

    return results_total
async def fetch_urls(urls,
                     out_fname,
                     logging_fnames=None):
  tasks = []
  connector = aiohttp.TCPConnector(limit_per_host=1)
  async with aiohttp.ClientSession(
      connector=connector, cookie_jar=aiohttp.DummyCookieJar()) as session:
    # Async fetch the urls
    sem = asyncio.Semaphore(FLAGS.max_parallel_requests)
    for url in urls:
      side_data = {"url": url}
      task = asyncio.ensure_future(
          throttled_fetch_url(url, sem, session, side_data))
      tasks.append(task)
    tf.logging.info("Async requested %d urls", len(urls))

    # Setup output files
    file_handles = []
    out_f = make_tfrecord_writer(out_fname)
    file_handles.append(out_f)

    logging_fnames = logging_fnames or {}

    samples_f = None
    if "samples" in logging_fnames:
      samples_f = tf.gfile.Open(logging_fnames["samples"], "w")
      file_handles.append(samples_f)

    refs_written = [0]  # Made a list so can be mutated

    def text_extraction_callback(callback_arg):
      url, text = callback_arg
      written = write_ref_content(url, text, out_f)
      if not written:
        return
      if not refs_written[0] % FLAGS.log_every:
        timestamp = datetime.datetime.now().strftime("%H:%M")
        tf.logging.info("%s: Wrote ref %d in group", timestamp, refs_written[0])
        if samples_f is not None:
          samples_f.write(url)
          samples_f.write("\n")
          samples_f.write(text)
          samples_f.write("\n\n---\n\n")
      refs_written[0] += 1

    try:
      # Process each URL as it comes in.
      # Using a multiprocessing Pool because the text extraction is expensive
      # and so we distribute across cores.
      pool = multiprocessing.Pool()
      results = []
      for task in asyncio.as_completed(tasks):
        html, side_data = await task
        url = side_data["url"]
        if not html:
          continue
        res = pool.apply_async(mp_get_text, (url, html), {},
                               text_extraction_callback)
        results.append(res)
      for res in results:
        try:
          res.get(timeout=10)
        except multiprocessing.TimeoutError:
          pass
    finally:
      for f in file_handles:
        f.close()

    return refs_written[0]