Beispiel #1
0
def crawl_by_keywords(keywords):
    connector =   db.MongoConnector(config.DB_HOST,config.DB_USER_NAME,config.DB_PASSWORD,config.DB_NAME)
    backend =  MongoQABackend(connector,config.QA_COLLECT_NAME)
    keywords = util.read_txt_lines(args.kw_file)
    keywords = util.expand_keywords(keywords,['飲食'])
    kw_request = KeywordQueryRequest(util.get_browser_driver(config.DRIVER_PATH,config.ENV))
    asession = AsyncHTMLSession()
    for keyword in keywords:
        start_url ='http://so.120ask.com/?kw=%s'%(keyword)
        current_url = start_url
        while True:
            page_src = kw_request.send(current_url)
            if page_src is None:
                break
            page = KeywordQueryPage(page_src)
            links = page.parse_question_links()
            qids =  page.parse_question_ids()
            l = []
            for qid,link in zip(qids,links):
                cb = AsyncHealthPageCallback(qid,backend)
                arq = AsyncHealthQuestionRequest(asession,link,cb)
                l.append(arq)
            if len(l)>0:
                asession.run(*[ r.send for  r in l ])

            next_link = page.parse_next_page_link()
            if next_link is None:
                break
            current_url = urljoin(start_url,next_link)
Beispiel #2
0
async def logic(urls):
    try:
        new_loop=asyncio.new_event_loop()
        asyncio.set_event_loop(new_loop)
        session = AsyncHTMLSession()
        browser = await launch({
            'ignoreHTTPSErrors':True, 
            'headless':True, 
            'handleSIGINT':False, 
            'handleSIGTERM':False, 
            'handleSIGHUP':False,
            'args': ['--no-sandbox', '--disable-setuid-sandbox']
        })
        session._browser = browser
        urls1=urls.split(',')
        emails1=[]
        for url in urls1:
            emails = await fetch(url, session)
            for email in emails:
                emails1.append(email)
        
        for i in range(len(emails1)):
            if(i % 2 == 1):
                emails1.pop(i-1)

        for i in range(len(emails1)):
            returndict.update({'email' + str(i+1):emails1[i]})

        return returndict
    except Exception as e:
        print(e)
        falseret=[]
        return falseret
Beispiel #3
0
def find_department_of_keywords(keywords, filepath):
    kw_request = KeywordQueryRequest(
        util.get_browser_driver(config.DRIVER_PATH, config.ENV))
    asession = AsyncHTMLSession()
    f = open(filepath, 'w', encoding='utf-8')
    for keyword in keywords:
        start_url = 'http://so.120ask.com/?kw=%s' % (keyword)
        current_url = start_url

        page_src = kw_request.send(current_url)
        assert page_src is not None
        page = KeywordQueryPage(page_src)
        links = page.parse_question_links()
        qids = page.parse_question_ids()
        l = []
        for qid, link in zip(qids, links):
            cb = DepartmentOfKeywordCallback()
            arq = AsyncHealthQuestionRequest(asession, link, cb)
            l.append(arq)
        assert len(l) > 3
        res = asession.run(*[r.send for r in l])
        # most of department of questions is the department of keyword
        c = Counter(res)
        department, url = c.most_common()[0][0]
        f.write('%s-->%s,%s\n' % (keyword, department, url))
Beispiel #4
0
 async def get_async_response(self):
     """
     define async get response to have the JS executed
     """
     asession = AsyncHTMLSession()
     #  asession = self.set_session(asession)
     aresponse = asession.get(self.url.geturl())
     return await aresponse
Beispiel #5
0
def async_get(event_loop):
    """ AsyncSession cannot be created global since it will create
        a different loop from pytest-asyncio. """
    async_session = AsyncHTMLSession()
    async_session.mount('file://', FileAdapter())
    path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
    url = 'file://{}'.format(path)

    return partial(async_session.get, url)
def async_get(event_loop):
    """ AsyncSession cannot be created global since it will create
        a different loop from pytest-asyncio. """
    async_session = AsyncHTMLSession()
    async_session.mount('file://', FileAdapter())
    path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
    url = 'file://{}'.format(path)

    return partial(async_session.get, url)
Beispiel #7
0
 def spider_opened(self, spider: Spider) -> None:
     """Open HTMLSession when spider starts"""
     try:
         self.session = AsyncHTMLSession(**self.settings)
     except TypeError:
         self.session = AsyncHTMLSession()
         raise AttributeError(
             "DEFAULT_SCRAPY_REQUESTS_SETTINGS is not " +
             "aligned with requests-html session settings. \n" +
             "Please check www.github.com/psf/requests-html/blob/026c4e5217cfc8347614148aab331d81402f596b/requests_html.py#L759"
         )
Beispiel #8
0
async def search_rootme_user_challenges(username: str):
    url = f"https://www.root-me.org/{username}?inc=score"

    session = AsyncHTMLSession()

    async def get_profile():

        r = await session.get(url)
        data = {}

        data['score'] = r.html.xpath(
            "/html/body/div[1]/div/div[2]/main/div/div/div/div/div[2]/div[1]/div[1]/span/text()"
        )[0].split("\xa0")[0][1:]
        data['ranking'] = r.html.xpath(
            "/html/body/div[1]/div/div[2]/main/div/div/div/div/div[2]/div[1]/div[2]/span"
        )[0].text
        data['rank'] = r.html.xpath(
            "/html/body/div[1]/div/div[2]/main/div/div/div/div/div[2]/div[1]/div[3]/span"
        )[0].text

        categories_list = r.html.xpath(
            "/html/body/div/div/div[2]/main/div/div/div/div/div[2]")[0].find(
                "div")

        categories = {}

        for x in categories_list:
            category = x.find('div')[0]
            try:
                title = category.find('h4')[0].text.split('\n')[1]
                categories[title] = {
                    "percentage": category.find('h4')[0].text.split('\n')[0]
                }
                points, _, completion = category.find("span")[1].text.split(
                    '\xa0')
                categories[title]['points'] = points
                categories[title]['completion'] = completion
                categories[title]['challenges'] = {}
                challenges = category.find("ul")[0].find('li')
                for challenge in challenges:
                    categories[title]['challenges'][challenge.text[2:]] = {
                        'completed':
                        True if challenge.text[0] == 'o' else False
                    }
                    categories[title]['challenges'][
                        challenge.text[2:]]['points'] = challenge.find(
                            'a')[0].attrs['title'].split(' ')[0]
            except:
                pass
        data['challenges'] = categories
        return data

    return session.run(get_profile)[0]
Beispiel #9
0
async def main():
    if not os.path.exists('./img'):
        os.mkdir('img')

    # sneaker_links_parser = Sneaker_Links("https://sneakerlinks.com", "test")
    # sneaker_links_parser.get_data()

    asession = AsyncHTMLSession()

    Solelinks_parser = Solelinks("https://vagu.space", "solelinks", asession)
    # await Solelinks_parser.get_data()
    # await task
    asession.run(Solelinks_parser.get_page)
Beispiel #10
0
    def __init__(self, session: AsyncHTMLSession = None, config={"EKSI_URL": "https://eksisozluk.com/"}):
        """
        Sınıfı başlatır.
        """

        if session == None:
            self.session = AsyncHTMLSession()
        else:
            if isinstance(session, AsyncHTMLSession):
                self.session = session
            else:
                self.session = AsyncHTMLSession()
        self.config = config
        self.eksi = self.config["EKSI_URL"]
Beispiel #11
0
    def __init__(self, gitlabAddress: str) -> None:
        self._orunSession = AsyncHTMLSession(workers=20)
        self._gitlabAddress = gitlabAddress

        # do not stop the loop on self.getAllProjectMetadata()
        self._dontStopLoop = True
        self._pagesCount = 0

        # raw data
        self.projectsMetadata = list()
        self.projectCommitsMetadata = list()

        # **start** Orunmila knows
        self._commitsByYear = dict()
        self._numberOfProjects = 0
Beispiel #12
0
async def main():

    pictures_queue = Queue()
    workers_count = 300
    connection = {
        'user': '******',  # input your postgres username
        'database': 'your database name',  # input your database name here
        'host': '127.0.0.1',  # change your host if it's not local
        'password': '******'  # input your password for this database
    }
    dsn = 'postgresql://{user}:{password}@{host}/{database}'.format(**connection)

    engine = create_engine(dsn)
    result = engine.execute('''select picture from "your_table_name"''')
    res_list = []
    for row in result:
        clean_jpg = row['picture'].split("\n")
        for i in clean_jpg:
            res_list.append(i)
    print(len(res_list))

    for pic in res_list:
        pictures_queue.put_nowait(pic)

    session = AsyncHTMLSession()

    tasks = []
    for num in range(workers_count):
        task = worker(pictures_queue, num, session)
        tasks.append(task)
    await asyncio.gather(*tasks)
Beispiel #13
0
async def moz_parser(urls_q):
    session = AsyncHTMLSession()

    while urls_q.qsize() > 0:
        url = await urls_q.get()
        try:
            response = await session.get(url)

            sleep(5)

        except Exception as e:
            print(type(e), e)
            await urls_q.put(url)
            continue

        links = response.html.xpath('//h2/a')

        for link in links:

            href = link.attrs['href']
            name = link.text

            with open('results.txt', 'a', encoding='utf-8') as f:
                f.write(f'{href}\t{name}\n')

        print(f'SUCCESS | {url}')
Beispiel #14
0
def main():
    session = AsyncHTMLSession()
    working_list = set([i for i in load(file)])
    print(f'Len of working list {len(working_list)}')
    tasks = [
        ping_creator(session, url) for url in working_list if filtered_url(url)
    ]
Beispiel #15
0
async def kuaidaili():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get(f'https://www.kuaidaili.com/free/inha/')
    for ip_row in resp.html.find('#list table tr'):
        ip = ip_row.find('td[data-title="IP"]', first=True)
        port = ip_row.find('td[data-title="PORT"]', first=True)
        if ip and port:
            res.append(Proxy(
                ip_port=f"{ip.text}:{port.text}",
                scheme=SCHEME_HTTP,
                status=STATUS_NEW,
            ))
    await asyncio.sleep(5)
    resp = await sess.get(f'https://www.kuaidaili.com/free/intr/')
    for ip_row in resp.html.find('#list table tr'):
        ip = ip_row.find('td[data-title="IP"]', first=True)
        port = ip_row.find('td[data-title="PORT"]', first=True)
        if ip and port:
            res.append(Proxy(
                ip_port=f"{ip.text}:{port.text}",
                scheme=SCHEME_HTTP,
                status=STATUS_NEW,
            ))
    await sess.close()

    return res
Beispiel #16
0
async def http_proxy():
    res = list()

    sess = AsyncHTMLSession()
    for u in [
        'https://proxyhttp.net/free-list/proxy-anonymous-hide-ip-address/',
        'https://proxyhttp.net/',
        'https://proxyhttp.net/free-list/anonymous-server-hide-ip-address/2#proxylist',
    ]:
        resp = await sess.get(u)
        await resp.html.arender(wait=1.5, timeout=10.0)
        for ip_row in resp.html.find('table.proxytbl tr'):
            ip = ip_row.find('td:nth-child(1)', first=True)
            port = ip_row.find('td:nth-child(2)', first=True)
            try:
                if ip and port:
                    port_str = re.search(r'//]]> (\d+)', port.text).group(1)
                    res.append(Proxy(
                        ip_port=f"{ip.text}:{port_str}",
                        scheme=SCHEME_HTTP,
                        status=STATUS_NEW,
                    ))
            except AttributeError:
                pass
    await sess.close()

    return res
Beispiel #17
0
 async def __get(url: str, proxy: str = None) -> Response:
     session = AsyncHTMLSession()
     result = await session.get(url, proxies=Scrapper.__proxies(proxy), timeout=Scrapper.PROXY_TIMEOUT) \
         if proxy is not None \
         else await session.get(url, timeout=Scrapper.GET_TIMEOUT)
     await session.close()
     return result
Beispiel #18
0
async def l_wine(message):
    search_string = message.text
    session = AsyncHTMLSession()
    link = 'https://l-wine.ru/collection/?'
    headers = {
        'authority': 'l-wine.ru',
        'cache-control': 'no-store, no-cache, must-revalidate',
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36',
        'sec-fetch-dest': 'empty',
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'origin': 'https://l-wine.ru',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'cors',
    }
    params = {
        'q': search_string,
        'PAGEN_1': '2',
        'bxajaxid': '556e3da235ad5f10fd6e4d11c79000cf',
        'parent_bxajaxid': '5a96feaafb623ec95f9c643be02cc78f'
    }

    r = await session.get(link, headers=headers, params=params)
    #await r.html.arender(timeout=20)
    print(r.text)
Beispiel #19
0
async def get_stock():
    bestbuy_base_url = "https://www.bestbuy.com/site/computer-cards-components/video-graphics-cards/abcat0507002.c?id=abcat0507002"
    bestbuy_model_stub = Template(
        "qp=gpusv_facet%3DGraphics%20Processing%20Unit%20(GPU)~NVIDIA%20GeForce%20RTX%20$Model"
    )

    # Get the current time and append to the end of the url just to add some minor difference
    # between scrapes.
    t = int(round(time.time() * 1000))

    urls = {
        f"3070-={bestbuy_base_url}&{bestbuy_model_stub.substitute(Model='3070')}&t={t}",
        f"3070-=https://www.newegg.com/p/pl?N=100007709%20601357250&PageSize=96&t={t}",
        f"3080-={bestbuy_base_url}&{bestbuy_model_stub.substitute(Model='3080')}&t={t}",
        f"3080-=https://www.newegg.com/p/pl?N=100007709%20601357247&PageSize=96&t={t}",
        f"3090-={bestbuy_base_url}&{bestbuy_model_stub.substitute(Model='3090')}&t={t}",
        f"3090-=https://www.newegg.com/p/pl?N=100007709%20601357248&PageSize=96&t={t}"
    }
    s = AsyncHTMLSession()

    tasks = (parse_url(s,
                       url.split("-=")[1],
                       url.split("-=")[0]) for url in urls)

    return await asyncio.gather(*tasks)
Beispiel #20
0
async def main2():

    urlList = await main()
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
    }
    #print(urlList)

    for link in urlList:

        asession = AsyncHTMLSession()
        r = await asession.get(link, headers=headers)

        await r.html.arender()

        if r.html.find('.video-wrap') == -1:
            continue
        else:
            h1 = r.html.find('.disable-download')
            #print(h1)

        newItem = str(h1)
        start = newItem.find("src='") + len("src='")
        end = newItem.find("' style='background")

        link = newItem[start:end]
        print(link)

        webbrowser.open(link, new=2)
        time.sleep(8)
Beispiel #21
0
    async def qfdocs(self, ctx, arg):
        query_url = f'https://qiskit.org/documentation/search.html?q={arg}&check_keywords=yes&area=default#'
        try:
            session = AsyncHTMLSession()
            response = await session.get(query_url)
        except:
            return await ctx.send('`Failed to Establish Connection.`')

        else:
            await response.html.arender(sleep=7)
            soup = BeautifulSoup(response.html.html, "html.parser")
            summary = soup.select('.search')
            #return await ctx.send('`Request Timed Out`')

        description = f''
        for li in summary[0].find_all('li')[0:10]:
            link = li.find('a', href=True)
            self.res.append(
                f'[`{link.contents[0]}`]({self.render_link + link["href"]})')

        if self.res == []:
            self.title = '`No Results Found`'
        else:
            self.title = f'`Results for: {arg}`'

        embed = discord.Embed(title=self.title,
                              description='\n'.join(self.res),
                              color=0xe8e3e3)

        return await ctx.send(embed=embed)
Beispiel #22
0
    async def fetch(self, link) -> Optional[Tuple]:
        session = AsyncHTMLSession()
        try:
            r = await session.get(link)
            container = r.html.find(".zn-body-text", first=True)
            contents: List[Element] = container.find()
            html = ""
            cover = None
            for content in contents:
                element_class = content.attrs.get('class')
                if element_class:
                    if "zn-body__paragraph" in element_class:
                        html += content.html

                    if "el__embedded" in element_class:
                        img = content.find('img', first=True)
                        if img:
                            caption = img.attrs.get('alt')
                            src = img.attrs.get('data-src-large')
                            src = f"https:{src}"
                            if not cover:
                                cover = src
                            html += f'<img src="{src}" />\n'
                            html += f"<span>{caption}</span>\n"
            self.parser.parse(html)
            return self.parser.convert(), str(self.parser), cover
        except Exception as e:
            print(e)
            return None, None, None
Beispiel #23
0
async def get_character_name(gear_url, message):
    """
    It is *sometimes* the case that discord users don't update their username 
    to be their character name (eg for alts).

    This method renders the gear_url in an HTML session and parses the page
    to attempt to find the character's name.

    This assumes a specific format of the page: player names are nested in
    an h3 element with css class named 'class-[player class]'

    Returns the character's name if successful, otherwise returns the message sender's
    display name in discord.
    """
    name = message.author.display_name
    if not re.match(SIXTY_UPGRADES_REGEX, gear_url):
        return name

    for i in range(MAX_FETCH_CHARACTER_NAME_RETRIES):
        try:
            asession = AsyncHTMLSession()
            webpage = await asession.get(gear_url)
            await webpage.html.arender()
            query_selector = "h3[class^='class-']"
            name = webpage.html.find(query_selector, first=True).text
            break
        except Exception as e:
            logging.error(e)
        finally:
            await asession.close()
    return name
 async def getErrorQuestion(self,urlAt):
     print("start error session " +urlAt[0] + " " + urlAt[1])
     errorSession = AsyncHTMLSession()
     repeat = True
     while repeat:
         try:
             print("connecting: " + urlAt[0])
             response = await errorSession.get(urlAt[0])
         except:
             pass
         else:
             repeat = False
     print("rendering: " + urlAt[0])
     try:
         rendered = await self.renderQuestion(response)
     except:
         print("error was on " + urlAt[0])
         self.errors.append(url)
         response.close()
     else:
         del self.errors[self.errors.index(urlAt)]
         response.close()
         print("correct rendered on:" +  urlAt[0])
         self.responses.append([rendered, urlAt[1]])
     await errorSession.close()
Beispiel #25
0
    async def fetch(self, link: str) -> Optional[Tuple]:
        try:
            session = AsyncHTMLSession()
            r = await session.get(link)
            content = r.html.find(".Mid2L_con", first=True)
            content_list = content.find()

            texts = []
            images = []
            content = ""
            cover = None

            for c in content_list:
                image = c.find("img", first=True)
                if image:
                    if image.attrs['src'] not in images:
                        content += image.html
                        images.append(image.attrs['src'])
                        if not cover:
                            cover = image.attrs['src']
                else:
                    if c.text not in texts:
                        content += c.html
                        texts.append(c.text)
            self.parser.parse(content)
            return self.parser.convert(), str(self.parser), cover
        except Exception as e:
            return None, None, None
Beispiel #26
0
async def get_stock():
    bestbuy_base_url = "https://www.bestbuy.com/site/computer-cards-components/video-graphics-cards/abcat0507002.c?id=abcat0507002"
    bestbuy_model_stub = Template("qp=gpusv_facet%3DGraphics%20Processing%20Unit%20(GPU)~NVIDIA%20GeForce%20RTX%20$Model")

    # Get the current time and append to the end of the url just to add some minor difference
    # between scrapes.
    t = int(round(time.time() * 1000))

    urls = {
        # f"3070-={bestbuy_base_url}&{bestbuy_model_stub.substitute(Model='3070')}&t={t}",
        # f"3070-=https://www.newegg.com/p/pl?N=100007709%20601357250&PageSize=96&t={t}",
        f"3080-={bestbuy_base_url}&{bestbuy_model_stub.substitute(Model='3080')}&t={t}",
        f"3080-=https://www.newegg.com/p/pl?N=100007709%20601357247&PageSize=96&t={t}",
        f"3090-={bestbuy_base_url}&{bestbuy_model_stub.substitute(Model='3090')}&t={t}",
        f"3090-=https://www.amazon.com/s?k=rtx+3090&i=computers&rh=n%3A17923671011%2Cn%3A284822%2Cp_n_availability%3A1248801011&dc&qid=1605664070&rnid=1248799011&t=%7Bt%7D&t={t}"
        f"3090-=https://www.newegg.com/p/pl?N=100007709%20601357248&PageSize=96&t={t}",
        f"5900X-=https://www.bestbuy.com/site/promo/amd-ryzen-5000?qp=numberofcores_facet%3DNumber%20of%20Cores~12-core&t={t}",
        f"5900X-=https://www.newegg.com/p/pl?N=100007671%20601359154%20601301117&t={t}",
        f"5900X-=https://www.amazon.com/s?k=5900x&i=computers&rh=n%3A229189%2Cp_n_availability%3A1248801011&dc&qid=1605664558&rnid=1248799011&t={t}",
    }
    s = AsyncHTMLSession()

    tasks = (parse_url(s, url.split("-=")[1], url.split("-=")[0]) for url in urls)

    return await asyncio.gather(*tasks)
Beispiel #27
0
def test_async_run():
    asession = AsyncHTMLSession()

    async def test1():
        return await asession.get('https://xkcd.com/1957/')

    async def test2():
        return await asession.get('https://reddit.com/')

    async def test3():
        return await asession.get('https://smile.amazon.com/')

    r = asession.run(test1, test2, test3)

    assert len(r) == 3
    assert isinstance(r[0], HTMLResponse)
Beispiel #28
0
async def proxynova():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get('https://www.proxynova.com/proxy-server-list/')
    for tr in resp.html.find('#tbl_proxy_list > tbody:nth-child(2) > tr'):
        if 'data-proxy-id' not in tr.attrs:
            continue

        script_element = tr.find('td:nth-child(1) > abbr > script', first=True)
        port_element = tr.find('td:nth-child(2)', first=True)
        if not script_element or not port_element:
            continue

        groups = re.findall(
            r"document\.write\('(.*?)'\);",
            script_element.text)
        if not groups or len(groups) != 1:
            continue
        ip = groups[0]
        port = port_element.text
        res.append(Proxy(
            ip_port=f"{ip}:{port}",
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        ))
    await sess.close()

    return res
Beispiel #29
0
async def getRanks(platform, profile):
    asession = AsyncHTMLSession()
    url = "https://rocketleague.tracker.network/profile/" + platform.strip(
    ) + "/" + profile
    r = await asession.get(url)
    await r.html.arender(retries=8)

    ranks = {}

    #parsing stuff below
    parse = r.html.text.split("(Top")
    highest = ["", 0]
    for i in range(1, len(parse) - 2):
        if ("Ranked" in parse[i] and "-" not in parse[i]):
            if ("Grand Champion Division" in parse[i]
                    or "Unranked Division" in parse[i]
                    or "I Division I" in parse[i]):
                gameMode = parse[i].split("v")[0].split("\n")[-1][:-1].strip()
                rank = parse[i].split(gameMode)[1].split("\n")[0][5:].strip()
                mmr = int(parse[i].split("\n")[-2].strip().replace(",", ""))
                ranks[gameMode] = {"Rank": "", "MMR": 0}
                ranks[gameMode]["Rank"] = rank
                ranks[gameMode]["MMR"] = mmr

                if (mmr > highest[1]):
                    highest = [rank, mmr]

    r.close()
    return ranks, highest
Beispiel #30
0
async def get_data_asynchronous():  
    urls = [
        'http://www.fpb.pt/fpb2014/!site.go?s=1&show=jog&id=258215'
    ]  

    with ThreadPoolExecutor(max_workers=20) as executor:
        with AsyncHTMLSession() as session:
            # Set any session parameters here before calling `fetch` 

            # Initialize the event loop        
            loop = asyncio.get_event_loop()

            # Use list comprehension to create a list of
            # tasks to complete. The executor will run the `fetch`
            # function for each url in the urlslist
            tasks = [
                await loop.run_in_executor(
                    executor,
                    fetch,
                    *(session, url) # Allows us to pass in multiple arguments to `fetch`
                )
                for url in urls
            ]

            # Initializes the tasks to run and awaits their results
            for response in await asyncio.gather(*tasks):
                parseWebpage(response)
Beispiel #31
0
 async def get_site(self):
     new_loop=asyncio.new_event_loop()
     asyncio.set_event_loop(new_loop)
     session = AsyncHTMLSession()
     browser = await pyppeteer.launch({
         'ignoreHTTPSErrors':True,
         'headless':True,
         'handleSIGINT':False,
         'handleSIGTERM':False,
         'handleSIGHUP':False
     })
     session._browser = browser
     url = 'https://money.tmx.com/en/quote/' + self.symbol
     resp_page = await session.get(url)
     await resp_page.html.arender()
     return resp_page