Example #1
0
	def run(self):
		print self.url
		#先提取单个页面具体链接
		pattern = re.compile('<h3><a href="(.*?)html" target="_blank" id="">')
		page_url_href = self.get_content(self.url,pattern)
		#再去获取具体的页面
		page_url_prefix = 'http://t66y.com/'
		for item in page_url_href:
			page_url = page_url_prefix + item +'html'
			#print page_url
			
			try:
				download_pattern = re.compile('<a target="_blank" .*?rmdown.*?>(.*?)</a>')
				download_content = self.get_src_content(page_url)
				download_url = download_pattern.findall(download_content)

			
				if download_url != []:
					#获取下载链接	
					download_url_link = download_url[0]
					print download_url_link
					#获取标题内容
					title_pattern = re.compile('<title>(.*?)</title>')
					title_content = title_pattern.findall(download_content)[0]
					title_content = title_content.decode('gbk').encode('utf-8')
					print title_content
					if download_url_link != '':
						url_link = Url_Link(title_content,download_url_link)
						url_link.save()
				time.sleep(1)
			except Exception as e:
				print 'Run Error '+str(e)+'!'
Example #2
0
def delete():
    Url_Link.objects(name="delete").delete()
Example #3
0
def update():
    Url_Link.objects(name="hello").update(link="http://www.163.com")
Example #4
0
def save_url():
    url = Url_Link(name="hello", link="http://www.baidu.com")
    url.save()