예제 #1
0
async def req():
    # 跳转页请求
    resp = await requests.get(MAIN_PAGE_URL)
    resp_text = await resp.text()
    # 取出图片URL并且并发请求
    image_urls = [f"{HOST}{image.get('src')}" for image in etree.HTML(resp_text).xpath('//img')]
    await asyncio.gather(*[requests.get(image_url) for image_url in image_urls])

    # 拿到内容页
    resp1 = await requests.get(MAIN_PAGE_URL)
    resp1_text = await resp1.text()
    # print(resp1_text)
    doc = etree.HTML(resp1_text)

    # 调用JS生成CSS
    # os.path.dirname(__file__)是取当前py文件的相对路径
    js = execjs.compile(open(f"{os.path.dirname(__file__)}/js/exam1.js", encoding="utf-8").read())
    css = base64.b64decode(js.call("get_css", resp1_text)).decode()
    print(css)

        # 解析CSS并覆盖到span标签的text中
    css_dict = css2dict(css)
    spans = doc.xpath('//span')
    for span in spans:
        span.text = css_dict.get(span.get("class"))

    # 移除p和script标签,来源:https://stackoverflow.com/questions/7981840/how-to-remove-an-element-in-lxml
    for bad in doc.xpath("//body/p|//body/script"):
        bad.getparent().remove(bad)

    # 用xpath直接取出body下的所有text,在清除前后空格和换行符之后合并到同一个字符串
    exam_text = "".join([text.strip() for text in doc.xpath('//body//text()')])
    print(exam_text)
예제 #2
0
def get_video(source):
    cheemses = ["https://www.youtube.com/channel/UChZWowQd_y6usuF7vSL4jmA"]
    channels = [
        "https://www.youtube.com/channel/UCYd6CmhFvvq6yruUBmGXjuA/videos",
        "https://www.youtube.com/channel/UCX2laRqGQhqoChYmlaUgOiw/videos",
        "https://www.youtube.com/user/wettitab/videos",
        "https://www.youtube.com/channel/UC38r7_x7oMPAZweB2fvGDXQ/videos",
        "https://www.youtube.com/channel/UC-xjitW_J39_Q1ure2HlJew/videos",
        "https://www.youtube.com/channel/UCHh-cQr-viOcimjPhxr3xRQ/videos",
        "https://www.youtube.com/channel/UCAJI1a4L0R5HkvTHTxZOd6g/videos",
        "https://www.youtube.com/user/shibainusaki/videos",
        "https://www.youtube.com/channel/UCOE2s_EwBM0es4TfC6ce7Fg/videos",
        "https://www.youtube.com/channel/UCkEdaRw8w0daEvGgzKff8TA",
        "https://www.youtube.com/channel/UC_WUkVnPROmHC1qnGHQAMDA",
        "https://www.youtube.com/channel/UChZWowQd_y6usuF7vSL4jmA",
    ]

    sources = {"shibes": channels, "cheems": cheemses}
    all_vids = []
    for i in sources[source]:
        url = i
        page = requests.get(url).content
        data = str(page).split(" ")
        item = 'href="/watch?'
        vids = [
            line.replace('href="', "youtube.com") for line in data
            if item in line
        ]  # list of all videos listed twice
        all_vids.extend(vids)
    return random.choice(all_vids)
async def request():
    # Using plain aiohttp client
    with show_duration('aiohttp', extra_newline=True):
        async with aiohttp.ClientSession() as session:
            async with session.get('https://www.google.com') as response:
                content = await response.text()
                print('Status:', response.status)  # 200
                print('Length:', len(content))  # 10597

    # The above becomes a bit easier without indents using `aiohttp_requests`
    with show_duration('aiohttp_requests', extra_newline=True):
        response = await requests.get('https://www.google.com')
        content = await response.text()
        print('Status: ', response.status)  # 200
        print('Length: ', len(content))  # 10625

    # Now, let's do some concurrent requests
    with show_duration('100 concurrent requests'):
        status_count = defaultdict(int)
        get_futures = [
            requests.get('https://www.google.com') for _ in range(1000)
        ]
        for get_future in asyncio.as_completed(get_futures):
            response = await get_future
            status_count[response.status] += 1

        print(status_count)  # {200: 100}
예제 #4
0
async def req():
    resp = await requests.get(MAIN_PAGE_URL)
    resp_text = await resp.text()
    print(resp_text)
    image_urls = [
        f"{HOST}{image.get('src')}"
        for image in etree.HTML(resp_text).xpath('//img')
    ]
    await asyncio.gather(
        *[requests.get(image_url) for image_url in image_urls])

    resp1 = await requests.get(MAIN_PAGE_URL)
    resp1_text = await resp1.text()
    print('###############################')
    print(resp1_text)
    doc = etree.HTML(resp1_text)

    js = execjs.compile(
        open(f"{os.path.dirname(__file__)}/js/exam1.js",
             encoding="utf-8").read())
    css = base64.b64decode(js.call('get_css', resp1_text)).decode()
    print('###############################')
    print(css)

    css_dict = css2dict(css)
    spans = doc.xpath('//span')
    for span in spans:
        span.text = css_dict.get(span.get("class"))

    for bad in doc.xpath("//body/p|//body/script"):
        bad.getparent().remove(bad)

    exam_text = "".join([text.strip() for text in doc.xpath('//body//text()')])
    print('###############################')
    print(exam_text)
예제 #5
0
async def watch_current():
    sleep_amount = 3

    while True:
        prev = chain[-1] if chain else None
        current = prev['to'] if prev else initial

        print('check ' + current)
        try:
            response = await asyncio.wait_for(requests.get(
                f"https://api.zksync.io/api/v0.1/account/{current}/history/0/15"
            ),
                                              timeout=15.0)
        except asyncio.TimeoutError:
            print('timeout!')
            await asyncio.sleep(2)
            continue

        data = await response.json()

        for tx in data:
            #print(json.dumps(tx, indent=4))
            created_at = calendar.timegm(parse(tx['created_at']).timetuple())
            if tx['tx']['type'] == 'Transfer' and tx['tx'][
                    'from'] == current and tx['tx']['token'] == 'TBTC' and (
                        not prev or created_at > prev['timestamp']):
                print("found tx -> " + tx['tx']["to"])
                chain.append({
                    "from": tx['tx']["from"],
                    "to": tx['tx']["to"],
                    "amount": tx['tx']["amount"],
                    "fee": tx['tx']["fee"],
                    "tx_id": tx['tx_id'],
                    "date": tx['created_at'],
                    "timestamp": created_at,
                })
                sleep_amount = 3
                break
        else:
            sleep_amount = 60

        await asyncio.sleep(sleep_amount)
예제 #6
0
import asyncio
from aiohttp_requests import requests
import requests

'''url = 'https://api.pushshift.io/reddit/comment/search/'
async def get_com(url, query):
    resp = await requests.get(url, params={'q': {query}})
    data = await resp.json()
    return data'''

'''url = 'https://api.pushshift.io/reddit/comment/search/'
def get_com(url, query):
    resp = requests.get(url, params={'q': {query}})
    data = resp.json()
    return data
print(get_com(url, "author",))'''

respon = requests.get('https://api.pushshift.io/reddit/comment/search/')
print(respon)