Python Reply Examples

Programming Language: Python

Namespace/Package Name: postgetter.getpost.models

Class/Type: Reply

Examples at hotexamples.com: 4

Python Reply - 4 examples found. These are the top rated real world Python examples of postgetter.getpost.models.Reply extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

save(4)

content(3)

Example #1

Show file

File: CdPostGetter.py Project: liveonnet/postgetter-app

    def getPost(self, post, new_nr=0):
        cnt = 0
        sr = urlparse.urlsplit(post.url)
        if not sr.scheme:
            posturl = post.sector.base_url + post.url
        else:
            posturl = post.url

        posturl = "%d".join(re.findall("(.*?thread-\d+-)\d+(-.*?.html)", posturl)[0])

        startpage, stoppage = self.getPageRange(self.NR_REPLY_PER_PAGE, post.nr_reply, new_nr)

        self.logger.debug("post %s page range [%d,%d)", post.locate_id, startpage, stoppage)
        parser = etree.HTMLParser()
        for pg in xrange(startpage, stoppage):
            self.logger.debug("post %s %s ...", post.locate_id, posturl % pg)
            data = self._getData(posturl % pg, None, post.title)
            if not data:
                self.logger.debug("-=-=- !!!!!!!!!!!!!! got None or '', skip !!!!!!!!!!!!!!!!!!! -=-=-")
                continue
                # check authentication. some posts need high permission to view.
            if data.find("超级大本营军事论坛 提示信息") != -1 and data.find("您无权进行当前操作，原因如下：") != -1:
                self.logger.debug(
                    "got err %s", re.search('<div class="alert_error">(.*?)</div>', data, re.M | re.S | re.I).group(1)
                )
                if self.login():
                    data = self._getData(posturl % pg, None, post.title)
                    if not data:
                        self.logger.debug("-=-=- !!!!!!!!!!!!!! got None or '', skip !!!!!!!!!!!!!!!!!!! -=-=-")
                        continue

            tree = etree.fromstring(data, parser)

            posts = tree.xpath('//div[@id="postlist"]/div[starts-with(@id,"post_")=true()]')
            for item in posts:
                replyid = item.attrib["id"][5:]
                assert replyid
                try:
                    author = item.xpath('./table//td[@class="pls"]/div[@class="pi"]/div[@class="authi"]/a/text()')[
                        0
                    ]  # item.xpath('./table/tr[1]/td[@class="pls"]/div[@class="pi"]/div[@class="authi"]/a/text()')[0]
                except IndexError:
                    author = item.xpath('./table//td[@class="pls"]/div[@class="pi"]/text()')[0].strip()
                assert author is not None

                ##				crt_date=item.xpath('./table/tr[1]/td[@class="postcontent"]/div[@class="postinfo"]/div[@class="posterinfo"]/div[@class="authorinfo"]/em/text()')[0][4:]
                crt_date = item.xpath('./table//div[@class="pti"]/div[@class="authi"]/em/text()')[0][
                    4:
                ]  # crt_date=item.xpath('./table/tr[1]/td[@class="plc"]/div[@class="pi"]/div[@class="pti"]/div[@class="authi"]/em/text()')[0][4:]

                # './table/tr[1]/td[@class="plc"]/div[@class="pct"]/div[@class="pcb"]/div[@class="t_fsz"]/table/tr/td[@id="postmessage_%s"]'%replyid
                ##				replycontent= item.xpath('//td[@id="postmessage_%s"]/*[not(@class="a_pr" or @class="pstatus")]'%replyid)[0]
                try:
                    replycontent = item.xpath('//td[@id="postmessage_%s"]' % replyid)[0]
                except IndexError:
                    if item.xpath('./table//div[@class="pct"]/div[@class="pcb"]/div[@class="locked"]'):
                        replycontent = item.xpath('./table//div[@class="pct"]/div[@class="pcb"]/div[@class="locked"]')[
                            0
                        ]
                        replycontent = htmlentitydecode(etree.tostring(replycontent)).strip()
                    else:
                        raise
                else:
                    assert replycontent is not None
                    # remove element 'ad' and 'poststatus'
                    for i in replycontent:
                        if "class" in i.attrib and i.attrib["class"] in ("a_pr", "pstatus"):
                            textlist = re.findall(
                                r"\A<%s.*?>.*?</%s>(.*?)\Z" % (i.tag, i.tag), etree.tostring(i), re.M | re.S | re.U
                            )
                            textlist = [x for x in textlist if x.strip() != ""]
                            if len(textlist) > 0:
                                remaintext = "<br />".join(textlist)
                                newelement = item.makeelement("br")
                                newelement.text = remaintext
                                replycontent.replace(i, newelement)
                            else:
                                replycontent.remove(i)
                    replycontent = self.exclude_first_td_tag.match(
                        htmlentitydecode(etree.tostring(replycontent)).strip()
                    ).group(1)

                try:
                    r = Reply.objects.filter(post=post, locate_id=replyid)[0:1].get()
                except Reply.DoesNotExist:
                    r = Reply(post=post, locate_id=replyid, crt_date=crt_date, author=author, content=replycontent)
                    try:
                        r.save()
                    except _mysql_exceptions.Warning:
                        self.logger.debug("got _mysql_exceptions.Warning!")
                        r.content = self.exclude_first_td_tag.match(etree.tostring(item[1]).strip()[:-5]).group(1)
                        if replycontent.startswith("<br/>"):
                            replycontent = replycontent[5:]
                        r.save()

                    cnt += 1

        if new_nr != 0:
            if post.nr_reply + cnt == new_nr:
                self.logger.info("post %s %+d reply. now %d", post.locate_id, cnt, new_nr + 1)
                post.nr_reply += cnt  # 增加实际变化（新增）的数量
            else:
                self.logger.debug(
                    "post %s %+d reply, %d != expect %d (no right new_nr info?)",
                    post.locate_id,
                    cnt,
                    post.nr_reply + cnt,
                    new_nr,
                )
                # 检查实际获得数量
                actualcnt = Reply.objects.filter(post=post).count()
                self.logger.info("post %s actual %d reply in DB", post.locate_id, actualcnt)
                post.nr_reply = actualcnt - 1 if actualcnt - 1 >= 0 else 0
        else:
            if post.nr_reply + 1 == cnt:
                self.logger.info("post %s init %+d reply.", post.locate_id, cnt)
            else:
                self.logger.info("post %s init %+d reply, != expect %d", post.locate_id, cnt, post.nr_reply + 1)
            post.nr_reply = cnt - 1 if cnt - 1 >= 0 else 0  # 设为实际获得值-1，以便下次再次尝试查找新增帖子
        post.save()

Example #2

Show file

File: SbzPostGetter.py Project: liveonnet/postgetter-app

	def getPost(self,post,new_nr=0):
		'''获取页面中的帖子。根据主题列表中显示的回帖数决定翻页范围'''
		cnt=0
		sr=urlparse.urlsplit(post.url)
		if not sr.scheme:
			posturl=post.sector.base_url+post.url
		else:
			posturl=post.url

		posturl+='&TopicPage=%d'
		posturl=posturl.replace('topicdisplay.asp','topicdisplay_safe.asp')

		startpage,stoppage=self.getPageRange(self.NR_REPLY_PER_PAGE,0 if post.nr_reply-1<0 else post.nr_reply-1,0 if new_nr==0 else new_nr-1)

		self.logger.debug('post %s page range [%d,%d)',post.locate_id,startpage,stoppage)
		parser=etree.HTMLParser()
		for pg in xrange(startpage,stoppage):
			if self.exitevent.is_set():
				self.logger.info('got exit signal!')
				break
			self.logger.debug('post %s %s ...',post.locate_id,posturl%pg)
			data=self._getData(posturl%pg,None,post.title)

			tree=etree.fromstring(data,parser)

			posts=tree.xpath('//table[@class="maintable"][1]/tr[position()>1]')
			for item in posts:
				try:
					author=item[0].xpath('a/text()')[0]
				except IndexError:
					self.logger.debug('no author info?')
					author=''

					if '浏览的页面或提交的数据包含敏感关键词信息,该关键词已经被过滤' in  htmlentitydecode(etree.tostring(item)):
						self.logger.info('got page contains words! %s ',htmlentitydecode(etree.tostring(item)))
						continue

				try:
					crt_date=item[1].xpath('font/text()')[0][6:-1]
					# 判断是否是主题贴，因为主题贴无法获得replyid
					tmp=item[1].xpath('*[position()<5]')
					if len(tmp)==4 and [x.tag for x in tmp]==['b','hr','font','hr']: # 是主题贴
						replyid=0
						try:
							realtitle=item[1].xpath('b/text()')[0]
						except IndexError:
							try:
								realtitle=item[1].xpath('b/font/b/text()')[0]
							except IndexError:
								realtitle=item[1].xpath('b/font/text()')[0]

						if post.title!=realtitle:
							self.logger.info('post %s realtitle %s',post.locate_id,realtitle)
							post.title=realtitle
							post.save()
						# 为后面获取回复内容而删除非回复信息(主题/分割线/发表时间等）
						for x in item[1].xpath('*[position()<5]'):
							item[1].remove(x)
						item[1].text=''
					else: # 非主题贴
						replyurl=item[1].xpath('font/a[1]/@href')[0]
						replyid=re.search('ReplyID=(\d+)',replyurl).group(1)
						# 为后面获取回复内容而删除非回复信息(分割线/发表时间等）
						for x in item[1].xpath('*[position()<3]'):
							item[1].remove(x)

					replycontent=self.exclude_first_td_tag.match(htmlentitydecode(etree.tostring(item[1])).strip()).group(1)
					if replycontent.startswith('<br/>'):
						replycontent=replycontent[5:]

					if author=='':
						author='usr for %d-%s'%(post.id,replyid)

	##					open('/home/kevin/tmp_post.txt','w').write(replycontent)
					try:
						r=Reply.objects.filter(post=post,locate_id=replyid)[0:1].get()
					except Reply.DoesNotExist:
						r=Reply(post=post,locate_id=replyid,crt_date=crt_date,author=author,content=replycontent)
						try:
							r.save()
						except _mysql_exceptions.Warning:
							self.logger.debug('got _mysql_exceptions.Warning!')
							r.content=self.exclude_first_td_tag.match(etree.tostring(item[1]).strip()[:-5]).group(1)
							if replycontent.startswith('<br/>'):
								replycontent=replycontent[5:]
							r.save()

						cnt+=1
				except IndexError:
					if '浏览的页面或提交的数据包含敏感关键词信息,该关键词已经被过滤' in  htmlentitydecode(etree.tostring(item)):
						self.logger.info('got page contains words! %s ',htmlentitydecode(etree.tostring(item)))
						continue
					else:
						raise
				except AttributeError:
					if '浏览的页面或提交的数据包含敏感关键词信息,该关键词已经被过滤' in  htmlentitydecode(etree.tostring(item)):
						self.logger.info('got page contains words! %s ',htmlentitydecode(etree.tostring(item)))
						continue
					else:
						raise


		if new_nr!=0:
			if post.nr_reply+cnt==new_nr:
				self.logger.info('post %s %+d reply. now %d',post.locate_id,cnt,new_nr+1)
				post.nr_reply+=cnt # 增加实际变化（新增）的数量
			else:
				self.logger.debug('post %s %+d reply, %d != expect %d (no right new_nr info?)',post.locate_id,cnt,post.nr_reply+cnt,new_nr)
				# 检查实际获得数量
				actualcnt=Reply.objects.filter(post=post).count()
				self.logger.info('post %s actual %d reply in DB',post.locate_id,actualcnt)
				post.nr_reply=actualcnt-1 if actualcnt-1>=0 else 0
		else:
			if post.nr_reply+1==cnt:
				self.logger.info('post %s init %+d reply.',post.locate_id,cnt)
			else:
				self.logger.info('post %s init %+d reply, != expect %d',post.locate_id,cnt,post.nr_reply+1)
			post.nr_reply=cnt-1 if cnt-1>=0 else 0 # 设为实际获得值-1，以便下次再次尝试查找新增帖子
		post.save()
		self.stat_reply_add+=cnt

Example #3

Show file

File: SbzPostGetter.py Project: liveonnet/postgetter-app

	def getOnePostSmart(self,post,from_page=1):
		'''获取页面中的帖子。根据页面中的“下一页”链接是否存在来决定是否结束翻页，比根据主题列表中显示的回帖数决定翻页范围要更准确'''
		cnt=0
		sr=urlparse.urlsplit(post.url)
		if not sr.scheme:
			posturl=post.sector.base_url+post.url
		else:
			posturl=post.url

		posturl+='&TopicPage=%d'
		posturl=posturl.replace('topicdisplay.asp','topicdisplay_safe.asp')

		parser=etree.HTMLParser()
		pg=from_page
		while True:
			if self.exitevent.is_set():
				self.logger.info('got exit signal!')
				break
			self.logger.debug('post %s %s ...',post.locate_id,posturl%pg)
			data=self._getData(posturl%pg,None,post.title)

			tree=etree.fromstring(data,parser)

			posts=tree.xpath('//table[@class="maintable"][1]/tr[position()>1]')
			haspostinpage=False
			gotfuckingword=False
			for item in posts:
				haspostinpage=True
				try:
					author=item[0].xpath('a/text()')[0]
				except IndexError:
					self.logger.debug('no author info?')
					author=''

					if '浏览的页面或提交的数据包含敏感关键词信息,该关键词已经被过滤' in  htmlentitydecode(etree.tostring(item)):
						self.logger.info('got page contains words! %s ',htmlentitydecode(etree.tostring(item)))
						gotfuckingword=True
						continue

				try:
					crt_date=item[1].xpath('font/text()')[0][6:-1]
					# 判断是否是主题贴，因为主题贴无法获得replyid
					tmp=item[1].xpath('*[position()<5]')
					if len(tmp)==4 and [x.tag for x in tmp]==['b','hr','font','hr']: # 是主题贴
						replyid=0
						try:
							realtitle=item[1].xpath('b/text()')[0]
						except IndexError:
							try:
								realtitle=item[1].xpath('b/font/b/text()')[0]
							except IndexError:
								realtitle=item[1].xpath('b/font/text()')[0]

						if post.title!=realtitle:
							self.logger.debug('post %s realtitle %s',post.locate_id,realtitle)
							post.title=realtitle
						# 为后面获取回复内容而删除非回复信息(主题/分割线/发表时间等）
						for x in item[1].xpath('*[position()<5]'):
							item[1].remove(x)
						item[1].text=''
					else: # 非主题贴
						replyurl=item[1].xpath('font/a[1]/@href')[0]
						replyid=re.search('ReplyID=(\d+)',replyurl).group(1)
						# 为后面获取回复内容而删除非回复信息(分割线/发表时间等）
						for x in item[1].xpath('*[position()<3]'):
							item[1].remove(x)

					replycontent=self.exclude_first_td_tag.match(htmlentitydecode(etree.tostring(item[1])).strip()).group(1)
					if replycontent.startswith('<br/>'):
						replycontent=replycontent[5:]

					if author=='':
						author='usr for %d-%s'%(post.id,replyid)

	##					open('/home/kevin/tmp_post.txt','w').write(replycontent)
					try:
						r=Reply.objects.filter(post=post,locate_id=replyid)[0:1].get()
					except Reply.DoesNotExist:
						r=Reply(post=post,locate_id=replyid,crt_date=crt_date,author=author,content=replycontent)
						try:
							r.save()
						except _mysql_exceptions.Warning:
							self.logger.debug('got _mysql_exceptions.Warning!')
							r.content=self.exclude_first_td_tag.match(etree.tostring(item[1]).strip()[:-5]).group(1)
							if replycontent.startswith('<br/>'):
								replycontent=replycontent[5:]
							r.save()

						cnt+=1
				except IndexError:
					if '浏览的页面或提交的数据包含敏感关键词信息,该关键词已经被过滤' in  htmlentitydecode(etree.tostring(item)):
						self.logger.info('got page contains words! %s ',htmlentitydecode(etree.tostring(item)))
						gotfuckingword=True
						continue
					else:
						raise
				except AttributeError:
					if '浏览的页面或提交的数据包含敏感关键词信息,该关键词已经被过滤' in  htmlentitydecode(etree.tostring(item)):
						self.logger.info('got page contains words! %s ',htmlentitydecode(etree.tostring(item)))
						gotfuckingword=True
						continue
					else:
						raise

			# check page next
			x=tree.xpath('//td[@class="outtd"][1]/table[2]/tr[1]/td[2]/a')
			if ('[>]' in [t.text for t in x]) and haspostinpage:
				pg+=1
			elif gotfuckingword: #
				pg+=1
			else:
				break


		self.logger.debug('post %s %+d reply',post.locate_id,cnt)
		# 检查实际获得数量
		actualcnt=Reply.objects.filter(post=post).count()
		self.logger.debug('post %s actual %d reply in DB',post.locate_id,actualcnt)
		post.nr_reply=actualcnt-1 if actualcnt-1>=0 else 0
		post.save()

Example #4

Show file

File: QbqPostGetter.py Project: liveonnet/postgetter-app

	def getPost(self,post,new_nr=0):
		cnt=0
		sr=urlparse.urlsplit(post.url)
		if not sr.scheme:
##			posturl=urlparse.urljoin(post.sector.base_url,post.url)
			posturl=urlparse.urljoin(post.sector.base_url, 'showtopic.aspx?topicid=%s&page=%%d'%post.locate_id)
		else:
			posturl=post.url

		startpage,stoppage=self.getPageRange(self.NR_REPLY_PER_PAGE,post.nr_reply,new_nr)

		self.logger.debug('page range [%d,%d) for post %s',startpage,stoppage,post.locate_id)
		parser=etree.HTMLParser()
		for pg in xrange(startpage,stoppage):
			self.logger.debug('post %s %s ...',post.locate_id,posturl%pg)
			data=self._getData(posturl%pg,None,post.title)
			tree=etree.fromstring(data,parser)

			pl=tree.xpath('//div[@id="postsContainer"]/table')

			for p in pl:
				if p.xpath('@class') and p.xpath('@class')[0]=='plh':
					realtitle=p.xpath('./tbody/tr/td[@class="posttopic"]/h1[@class="ts z"]/span/text()')[0]
					if realtitle and post.title!=realtitle :
						self.logger.debug('realtitle for post %s|%s',post.locate_id,realtitle)
						post.title=realtitle

					continue

				author=p.xpath('./tbody[1]/tr[1]/td[@class="postauthor"]/div[@class="poster"]/span/text()')[0]
				replyid=p.xpath('./@id')[0]
				if p.xpath('//div[@id="message%s"]/div[@id="firstpost"]'%replyid):
					replycontent=htmlentitydecode(etree.tostring(p.xpath('//div[@id="message%s"]/div[@id="firstpost"]'%replyid)[0])).strip()
				else:
					replycontent=htmlentitydecode(etree.tostring(p.xpath('//div[@id="message%s"]'%replyid)[0])).strip()
				replycontent=self.exclude_first_div_tag.match(replycontent).group(1).strip()

##				crt_date=p.xpath('./tbody[1]/tr[1]/td[@class="postcontent"]/div[@class="pi"]/div[@class="postinfo"]/em/span/@title')[0]
				crt_date=p.xpath('//div[@class="postinfo"]/em/span/@title')[0]

				try:
					r=Reply.objects.filter(post=post, author=author, locate_id=replyid)[0:1].get()
				except Reply.DoesNotExist:
					r=Reply(post=post,locate_id=replyid,crt_date=crt_date,author=author,content=replycontent)
					r.save()
					cnt+=1

##		debug('post %s add %d reply.',post.locate_id,cnt)
		if new_nr!=0:
			if post.nr_reply+cnt==new_nr:
				self.logger.info('post %s %+d reply. now %d',post.locate_id,cnt,new_nr+1)
				post.nr_reply+=cnt # 增加实际变化（新增）的数量
			else:
				self.logger.debug('post %s %+d reply, %d != expect %d',post.locate_id,cnt,post.nr_reply+cnt,new_nr)
				# 检查实际获得数量
				actualcnt=Reply.objects.filter(post=post).count()
				self.logger.info('post %s actual %d reply in DB',post.locate_id,actualcnt)
				post.nr_reply=actualcnt-1 if actualcnt-1>=0 else 0
		else:
			if post.nr_reply+1==cnt:
				self.logger.info('post %s init %+d reply.',post.locate_id,cnt)
			else:
				self.logger.info('post %s init %+d reply, != expect %d',post.locate_id,cnt,post.nr_reply+1)
			post.nr_reply=cnt-1 if cnt-1>=0 else 0 # 设为实际获得值-1，以便下次再次尝试查找新增帖子
		post.save()