Ejemplo n.º 1
0
	def run(self):
		print self.city.name, "线程开始抓取"
		for i in xrange(1, 2):
			articles = []
			url = host + url_args.format(fid=str(self.city.fid), page=str(i))
			home_page = Base.get_home(url)
			article_infos = Base.get_article_infos(home_page)
			for article_info in article_infos:
				time_at = do_time.str2datetime(article_info['time_at'], "%Y-%m-%d %H:%M")
				tid = int(article_info['tid'])
				if time_at <= self.latest_time_at and article_info['_type'] != 'hot':
					Article.add_all(articles)
					print self.city.name, "结束"
					return
				else:
					if tid not in self.tids:
						print "抓取", self.city.name, i, "页", "帖子", article_info['title']
						content = Base.get_content(article_info['url'])
						articles.append(
							Article(
								city_id=self.city.id,
								tid=tid,
								type=article_info['_type'],
								title=article_info['title'],
								time_at=time_at,
								content=content,
								author=article_info['author'],
								reply_number=article_info['reply_number'],
								read_number=article_info['read_number'],
								url=article_info['url'],
							))
			Article.add_all(articles)
		print self.city.name, "结束"
Ejemplo n.º 2
0
	def __init__(self, city, latest_time_at, tids):
		threading.Thread.__init__(self)
		self.city = city
		self.latest_time_at = latest_time_at or \
			do_time.str2datetime("1990-01-01 00:00", "%Y-%m-%d %H:%M")
		self.tids = tids
Ejemplo n.º 3
0
 def mget_by_city_id_and_time(cls, city_id, time_at):
     time_at = do_time.str2datetime('1994-01-01 22:22:22') if not time_at \
         else time_at
     return DBSession().query(cls). \
         filter(cls.city_id == city_id). \
         filter(cls.time_at > time_at).all()