def getXiXi(name): try: conn = MySQLdb.connect(host='172.27.2.26', user='******', passwd='asd123', db='movie', port=3306, charset="utf8") cur = conn.cursor() mainHtml = getHtml(domain) an = re.findall(r'href="(/content-.*?html)"', mainHtml) s = set(an) for i in s: print(domain+i) html = getHtml(domain+i) soup = BeautifulSoup(html) newtitle = soup.select('.newtitle h1')[0].string # print(title.split(' ')[1]) # print(tools.getRating(str(title.split(' ')[1]))) rating = tools.getRating(newtitle.split(' ')[0]) soup.select('.cont_l_d_ul')[0].a['href'] = domain+str(soup.select('.cont_l_d_ul')[0].a['href']) soup.select('.infoimg')[0].img['src']=domain+str(soup.select('.infoimg')[0].img['src']) # print(soup.select('.infoimg')[0].img['src']) download = soup.select('.cont_l_d_ul')[0] imdbinfo = repr(soup.select('.imdbinfo')[0]) post_content = (newtitle.replace('-xixiHD', ''), imdbinfo+repr(download), domain+i,rating) cur.execute("replace into film(label,title,content,origin,rating) VALUES('xixi',%s,%s,%s,%s)", post_content) time.sleep(5) except MySQLdb.Error, e: print "Mysql Error %d: %s" % (e.args[0], e.args[1])
def getGaoQing(name): try: conn = MySQLdb.connect(host='172.27.2.26', user='******', passwd='asd123', db='movie', port=3306, charset="utf8") cur = conn.cursor() mainHtml = getHtml('http://gaoqing.la/') mainsoup = BeautifulSoup(mainHtml) an = re.findall(r'href="(http://gaoqing\.la/.*?html)"', str(mainsoup.find('ul', id='post_container'))) s = set(an) for i in s: print i html = getHtml(i) soup = BeautifulSoup(html) title = str(soup.title.string).replace('中国高清网', '') douban = tools.getRating(title.split(' ')[1]) content = repr(soup.find('div', id="post_content")) post_content = (title, content, i, douban, content, tools.getTime()) cur.execute("insert into film(label,title,content,origin,douban) VALUES('gaoqing',%s,%s,%s,%s) ON DUPLICATE KEY UPDATE content=%s, datetime=%s", post_content) time.sleep(5) except MySQLdb.Error, e: print "Mysql Error %d: %s" % (e.args[0], e.args[1])