コード例 #1
0
ファイル: xixi.py プロジェクト: starlifht/py
def getXiXi(name):
    try:
        conn = MySQLdb.connect(host='172.27.2.26', user='******', passwd='asd123', db='movie', port=3306, charset="utf8")
        cur = conn.cursor()
        mainHtml = getHtml(domain)
        an = re.findall(r'href="(/content-.*?html)"', mainHtml)
        s = set(an)
        for i in s:
            print(domain+i)
            html = getHtml(domain+i)
            soup = BeautifulSoup(html)
            newtitle = soup.select('.newtitle h1')[0].string
            # print(title.split(' ')[1])
            # print(tools.getRating(str(title.split(' ')[1])))
            rating = tools.getRating(newtitle.split(' ')[0])

            soup.select('.cont_l_d_ul')[0].a['href'] = domain+str(soup.select('.cont_l_d_ul')[0].a['href'])
            soup.select('.infoimg')[0].img['src']=domain+str(soup.select('.infoimg')[0].img['src'])
            # print(soup.select('.infoimg')[0].img['src'])
            download = soup.select('.cont_l_d_ul')[0]
            imdbinfo = repr(soup.select('.imdbinfo')[0])
            post_content = (newtitle.replace('-xixiHD', ''), imdbinfo+repr(download), domain+i,rating)
            cur.execute("replace into film(label,title,content,origin,rating) VALUES('xixi',%s,%s,%s,%s)", post_content)
            time.sleep(5)

    except MySQLdb.Error, e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
コード例 #2
0
ファイル: gaoqing.py プロジェクト: starlifht/py
def getGaoQing(name):
    try:
        conn = MySQLdb.connect(host='172.27.2.26', user='******', passwd='asd123', db='movie', port=3306, charset="utf8")
        cur = conn.cursor()
        mainHtml = getHtml('http://gaoqing.la/')
        mainsoup = BeautifulSoup(mainHtml)
        an = re.findall(r'href="(http://gaoqing\.la/.*?html)"', str(mainsoup.find('ul', id='post_container')))
        s = set(an)
        for i in s:
            print i
            html = getHtml(i)
            soup = BeautifulSoup(html)
            title = str(soup.title.string).replace('中国高清网', '')
            douban = tools.getRating(title.split(' ')[1])
            content = repr(soup.find('div', id="post_content"))
            post_content = (title, content, i, douban, content, tools.getTime())
            cur.execute("insert into film(label,title,content,origin,douban) VALUES('gaoqing',%s,%s,%s,%s) ON DUPLICATE KEY UPDATE content=%s, datetime=%s", post_content)
            time.sleep(5)

    except MySQLdb.Error, e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])