Beispiel #1
0
def get_tasklist(url):
    res = lt.send_request(url)
    categories = lt.links(res, search='/cp')
    tasklist = [
        f'{domain}/pg{n}-{c[1:]}' for c in categories for n in range(1, 101)
    ]
    return tasklist
Beispiel #2
0
def test_links():
    res = lt.send_request(domain)
    r = lt.links(res)
    wikis = lt.links(res, search='wiki')
    assert isinstance(r, list)
    assert '#' not in r and '' not in r
    assert len(set(r)) == len(r)
    assert all(['wiki' in wiki for wiki in wikis])
Beispiel #3
0
def mycrawl(url):
    try:
        res = lt.send_request(url)
        res.encoding = 'utf-8'
        webcode = res.text
        coinproof = re.findall(proofpattern, webcode)
        coinalgorithm = re.findall(algopattern, webcode)
        coinmax = re.findall(maxpattern, webcode)
        coininfo['name'].append(url)
        coininfo['algo'].append(coinalgorithm)
        coininfo['proof'].append(coinproof)
        coininfo['total'].append(coinmax)
    except:
        logging.exception('error')
        coininfo['name'].append(url)
        coininfo['algo'].append('error')
        coininfo['proof'].append('error')
        coininfo['total'].append('error')
Beispiel #4
0
def crawl(url):
    tree = lt.fetch(url, use_cookies=True, headers=headers)
    time.sleep(0.5)
    items = tree.css('.wrap')
    for item in items:
        data = dict()
        data['views'] = lt.expand_num(
            item.css('span.views var::text').extract_first())
        data['rating'] = int(item.css('.value::text').extract_first()[:-1])
        viewKey = item.css('a::attr(href)').extract_first().split('=')[-1]
        video = lt.send_request(f'{domain}/embed/{viewKey}',
                                cookies=cookies,
                                headers=headers).text
        flashvars = re.findall('var flashvars =(.*?),\n', video)[0]
        info = json.loads(flashvars)
        data['title'] = info.get('video_title')
        data['duration'] = info.get('video_duration')
        data['image'] = info.get('image_url')
        data['link'] = info.get('link_url')
        data['quality_480p'] = info.get('quality_480p')
        pprint(data)
        col.insert_one(data)
Beispiel #5
0
def test_send_request():
    res = lt.send_request(domain)
    assert type(res) == requests.models.Response
    assert res.status_code == 200
Beispiel #6
0
def test_re_links():
    res = lt.send_request(f'{domain}/post')
    hrefs = lt.re_links(res, r'https://konachan.net/wiki/.*?')
    assert type(hrefs) == list and len(hrefs) > 5
Beispiel #7
0
def test_links():
    res = lt.send_request(domain)
    r = lt.links(res)
    assert type(r) == list and '#' not in r
Beispiel #8
0
def test_get():
    res = lt.send_request(domain)
    assert res.status_code == 200
Beispiel #9
0
def test_proxies():
    res = lt.send_request('http://httpbin.org/get', use_proxies=True)
    assert res.status_code == 200
Beispiel #10
0
def test_absolute_links():
    res = lt.send_request('http://www.spbeen.com')
    hrefs = [href.replace('http://','') for href in lt.links(res, absolute=True)]
    assert len([href for href in hrefs if "//" in href])==0
Beispiel #11
0
def test_re_links():
    res = lt.send_request('http://www.spbeen.com')
    hrefs = lt.re_links(res,'https?://www.spbeen.com//p/.*?')
    assert type(hrefs) == list
Beispiel #12
0
def test_re_links():
    res = lt.send_request(f'{domain}/post')
    hrefs = lt.re_links(res, r'https://konachan.com/wiki/.*?')
    assert isinstance(hrefs, list) and len(hrefs) > 5
Beispiel #13
0
def test_read_cookies():
    url = 'http://httpbin.org/cookies'
    cookies = lt.read_cookies(filename='./looter/examples/cookies.txt')
    r = lt.send_request(url, cookies=cookies)
    assert dict(cookies.items()) == r.json()['cookies']
Beispiel #14
0
def test_send_request():
    res = lt.send_request(domain)
    assert isinstance(res, requests.models.Response)
    assert res.status_code == 200