def getResponse(host, movie_url):
    global number
    _proxy = getProxy()
    response = HttpRequest.httpRequest(host, url=movie_url, proxy=_proxy)
    number += 1
    if not response._request:
        getResponse(host, movie_url)
    else:
        return response
import os, sys

from util import HttpRequest
from util import Configs
from util import MysqlClient
from bs4 import BeautifulSoup

reload(sys)
sys.setdefaultencoding('utf8')

config = Configs.Configs()

db = MysqlClient.MysqlDB()

request = HttpRequest.httpRequest("cn-proxy.com", "http://cn-proxy.com")

html_code = BeautifulSoup(request.response().text)

def saveDb(sqls):
    return db.execute(sqls)

def queryDb(sqls):
    return db.queryFirst(sqls)

for tables in html_code.find_all('table', class_="sortable"):
    for trs in tables.find_all('tr'):
        tds = []
        n = 0
        for td in trs.select("td"):
            try:
Example #3
0
<p class="tt cl">
    <span>2015/07/31</span>
    <a href="/subject/26978.html" target="_blank">
        <b>
            <font color="#FF6600">小时代4:灵魂尽头迅雷下载<i>/小时代4:灵魂尽头</i>.2015</font>
        </b>
    </a>
</p>
<p>又名:<a href="/subject/26978.html" target="_blank">小时代4/Tiny Time 4.0</a></p>
<p class="des">2015(中国大陆)<em>/</em>杨幂<em>/</em>郭采洁<em>/</em>陈学冬<em>/</em>郭碧婷<em>/</em>谢依霖<em>/</em>李贤宰<em>/</em>锦荣<em>/</em>任言恺<em>/</em>姜潮<em>/</em>王琳<em>/</em>商侃<em>/</em>郭敬明</p>
<p class="rt">豆瓣评分:<strong>4</strong><em class="dian">.</em><em class="fm">7</em>  <a href="/jumpto.php?aid=26978" rel="nofollow" target="_blank" title="去豆瓣查看影片介绍"><em class="e_db"></em></a></p>
'''

reload(sys)
sys.setdefaultencoding('utf8')

host = "www.bttiantang.com"

for p in range(1, 687):
    response = HttpRequest.httpRequest(host, url="http://"+host+"/?PageNo="+str(p))

    html_code = BeautifulSoup(response.response().text)

    for i in html_code.find_all('div', class_="item"):
        for j in i.select(".title > .tt"):
            try:
                http_path = "http://"+host+j.a['href']
                spider_movie.SpiderMovies.delay(host, str(http_path))

            except Exception, e:
                print e,j
Example #4
0
Created on PyCharm  

@author: Edison

@date: 15/8/6  下午4:31

@summary: 

@note: 

@version: 
'''
__author__ = 'Edison'

import os,sys
import spider_movie

from util import HttpRequest
from bs4 import BeautifulSoup

reload(sys)
sys.setdefaultencoding('utf8')

host = "rrmj.tv"

response = HttpRequest.httpRequest(host)

html_code = BeautifulSoup(response.response())

for i in html_code.select("#menu3"):
    print i.href