コード例 #1
0
ファイル: getallpage.py プロジェクト: golgq/1024VideoSpider
	def get_src_content(self,url):
		try:
			page_url = self.url
			request = urllib2.Request(url,headers={'user-agent':randomUA.random_ua()})#,'cookie':'__cfduid=d50071b2a7f9ad6ebd19e90bb50be84981452179480; 227c9_lastfid=0; 227c9_lastvisit=0%091452179585%09%2Fprofile.php%3Faction%3Dshow%26uid%3D18844; CNZZDATA950900=cnzz_eid%3D970883499-1452175128-%26ntime%3D1452175549'})
			response = urllib2.urlopen(request)
			content = response.read()
			#response.close()
			return content
		except Exception as e:
			print 'Get content Error '+str(e)+'!'
			return ''
コード例 #2
0
ファイル: 1024.py プロジェクト: golgq/1024VideoSpider
def download_start():
	#1024.txt存储了所有的下载链接
	fs = open('1024.txt','r')
	url_list = fs.readlines()
	fs.close()
	flag = 0
	interval = 10
	torrent_list = []
	for rmdownloadurl in url_list:
		print rmdownloadurl
		random_ua = randomUA.random_ua()
		request = urllib2.Request(rmdownloadurl)
		request.add_header('User-Agent',random_ua)
		request.add_header('Refer',rmdownloadurl)
		request.add_header('Upgrade-Insecure-Requests','1')
		request.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')
		if flag != 0:
			request.add_header('Cookie',cookie_value)
		#创建cookie处理器
		cookie = cookielib.CookieJar()
		handler=urllib2.HTTPCookieProcessor(cookie)
		opener = urllib2.build_opener(handler)
		#此处的open方法同urllib2的urlopen方法,也可以传入request
		try:
			response = opener.open(request,timeout=30)
			downloadpage = response.read()
					#读取boundary reff
			regex_download_pattern = re.compile('')
			reff_reg = re.compile('<INPUT .*?reff.*?value="(.*?)">')
			reff = re.findall(reff_reg,downloadpage)[0]
		except socket.timeout as e:
			print '-----------------------------------------'
		except:
			print '-----------------Game over---------------'
		
		if flag == 0:
			for item in cookie:
				cookie_value = item.name+'='+item.value
				break
			flag = 1

		#发起POST请求

		post_req = post_request(rmdownloadurl,random_ua)
		#boundary = '------WebKitFormBoundary'+rand()
		boundary = UA_Boundary[random_ua]+rand(random_ua)
		hashvalue = rmdownloadurl.split('=')[1]
		#与浏览器相关
		post_req.add_header('Content-type','multipart/form-data; boundary='+boundary)
		post_req.add_header('Cookie',cookie_value)
		payload = '--'+boundary+'\r\n'+'Content-Disposition: form-data; name="ref"'+'\r\n\r\n'+hashvalue.replace('\n','')+'\r\n'+'--'+boundary \
					+'\r\n'+'Content-Disposition: form-data; name="reff"'+'\r\n\r\n'+reff+'\r\n'+'--'+boundary+'\r\n'+'Content-Disposition: form-data; name="submit"'+'\r\n\r\n' \
					+'download'+'\r\n'+'--'+boundary+'--'+'\r\n'
		post_req.add_data(payload)
		try:
			req = urllib2.urlopen(post_req,timeout=30)
			data = BytesIO(req.read())
			torrent_list.append(data)
		except socket.timeout as e:
			time.sleep(30)
			print '-----------------------------------------'
			url_list.append(rmdownloadurl)
		except:
			print 'boom!'
		#StringIO 会出错
		
		response.close()
		req.close()
		print len(torrent_list)
		
		#interval += 10
		#time.sleep(0.1)
	for item in torrent_list:
		gziper = gzip.GzipFile(fileobj = item)
		decode = gziper.read()
		torrent_fs = open('F:\\1024torrent\\'+rand('1')+'.torrent','wb')
		torrent_fs.write(decode)
		torrent_fs.close()