Esempio n. 1
0
def main():
    req = Request()
    req.agents = [
        ('Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like '
         'Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586'),
        ('Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like '
         'Gecko) Chrome/51.0.2704.103 Safari/537.36')]
    response = req.get(REQ)
    print __parse(response.text)
Esempio n. 2
0
def main():
    req = Request()
    req.agents = [
        ('Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like '
         'Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586'),
        ('Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like '
         'Gecko) Chrome/51.0.2704.103 Safari/537.36')]
    response = req.get('http://utaukitune.ldblog.jp/archives/65696057.html')
    print __parse(response.text)
Esempio n. 3
0
def __parse(text):
    et = etree.fromstring(text, parser=etree.HTMLParser())
    for e in et.xpath("//a[@class='pstl']"):
        suffix = e.attrib['href']
        url = REQ + suffix
        req = Request()
        req.agents = [
            ('Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like '
             'Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586'),
            ('Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like '
             'Gecko) Chrome/51.0.2704.103 Safari/537.36')]
        response = req.get(url)
        et2 = etree.fromstring(response.text, parser=etree.HTMLParser())