Beispiel #1
0
 def get(self):
     newsHtmlList = Contents.all().filter("status =", "2").fetch(1)
     if newsHtmlList:
         try:
             r = newsHtmlList[0].rss
         except:
             newsHtmlList[0].status = "1"
             # db.delete(newsHtmlList)
             logging.error("delete one news ,has no rss")
             return
         #            '''
         #            接下来就是要处理原始材料了。这是第一个版本的剥皮程序。
         #            1.找寻所有的<p></p> 之间的内容。
         #                (根据我的观察,能发布新闻RSS的网站都是大型网站,有优化html代码的习惯。使得新闻html很简化。因此我觉得这个方法有一定的可行性。)
         #          '''
         newsHtmlList = Contents.all().filter("rss =", newsHtmlList[0].rss).filter("status =", "2").fetch(20)
         oldHtmlList = memcache.get("oldhtmllist" + str(newsHtmlList[0].rss.key().id()))
         if not oldHtmlList:
             oldHtmlList = Contents.all().filter("rss =", newsHtmlList[0].rss).filter("status >", "2").fetch(10)
             try:
                 memcache.set("oldhtmllist" + str(newsHtmlList[0].rss.key().id()), oldHtmlList, 3600 * 24 * 3)
             except Exception, e:
                 pass
         #            content=newsHtmlList[0]
         #            news=skinSubOne(content.content)
         skinSubTwo(oldHtmlList, newsHtmlList)
Beispiel #2
0
 def get(self):
     cid = self.request.get("content")
     if cid:
         content = Contents.get_by_id(int(cid))
         self.render("templates/look.html", {"content": content, "view": True})
         return
     rssid = self.request.get("rss")
     if rssid:
         c = Contents.all().filter("realContentResult =", 0).filter("rss =", RSS.get_by_id(int(rssid))).fetch(1)
     if not rssid or not c:
         c = Contents.all().filter("realContentResult =", 0).fetch(1)
     if c:
         content = c[0]
         self.render("templates/look.html", {"content": content})
     else:
         self.redirect("/")
Beispiel #3
0
    def parse(self):
        contents = Contents(file=str(self.xcloc_path))

        self.__parse_contents_json(contents)
        self.__parse_xliff(contents)
        self.__parse_screenshots(contents)

        return contents
Beispiel #4
0
    def write_file_to_database(self, m_group_id, m_group_id2, m_title, m_path,
                               m_url, m_level):
        print 'write_to_database'
        '''doc = Documents.objects.filter(url = m_url)
		if (len(doc) == 0):
			doc = Documents(url=m_url, uid=self.uid, title=m_title, level = m_level)
			doc.save()
			print 'create\n'
		else:
			doc[0].uid = self.uid
			doc[0].title = m_title
			# doc.update()
			doc[0].save()
			doc = doc[0]
			print 'update\n'''
        # doc = Documents(title='title', url='url')
        # doc.save()
        # print 'docid is: %s' % str(doc)
        # Delete old records
        res = ResAddr()
        id = res.saveResaddr(m_url, m_level, '', m_title)
        doc = res.getById(id)
        oldContents = Contents.objects.filter(doc=doc)
        oldContents.delete()
        try:
            print '\n\n\nstart write url:', m_url
            reader = ReadBigFile(m_path, blockSize=1024 * 16)
            block = reader.getNextBlock()

            while (block != None):
                # print 'm_content' + para
                con = Contents(group_id=doc.id,
                               group_id2=m_group_id2,
                               doc=doc,
                               paragraph=unicode(block),
                               tag='p')
                con.save()
                block = reader.getNextBlock()
            os.remove(m_path)
            return doc.id
        except Exception as e:
            print e
            print str(e)
            exit()
            return None
Beispiel #5
0
 def get(self):
     rssid = self.request.get("rss")
     c = (
         Contents.all()
         .filter("rss =", RSS.get_by_id(int(rssid)))
         .filter("realContentResult >", 0)
         .filter("realContentResult !=", None)
     )
     self.render("templates/detailLook.html", {"content": c})
    def parse(self):
        contents = Contents(file=str(self.file_path))

        wb = openpyxl.load_workbook(self.file_path)

        self.__parse_metadata_sheet(contents, wb)
        self.__parse_translation_sheets(contents, wb)

        return contents
Beispiel #7
0
 def get(self):
     #        nocode=Contents.all().filter('status =','1').filter('code =',None).fetch(10)
     #        db.delete(nocode)
     nocontent = Contents.all().filter("status =", "1").filter("hasContent =", False).fetch(10)
     db.delete(nocontent)
     deletecontent = Contents.all().filter("status =", "1").filter("hasDelete =", True).fetch(10, 30)
     db.delete(deletecontent)
     #        oldpic=Picture.all().filter('datetime <',datetime.now()+timedelta(hours=-72)).fetch(300)
     #        db.delete(oldpic)
     #        norss=Contents().all().filter('status =','2').fetch(100)
     rss = 0
     #        for c in norss:
     #            try:
     #                r=c.rss.code
     #            except :
     #                logging.info('delete 1')
     #                c.status='1'
     #                c.put()
     #                rss+=1
     logging.info("nocontent:" + str(len(nocontent)) + "-" + "deletecontent:" + str(len(deletecontent)))
Beispiel #8
0
 def post(self):
     link = self.request.get("link")
     c = Contents.all().filter("link =", link).fetch(1)
     if c:
         content = c[0]
         content.realContentResult = int(self.request.get("realContentResult"))
         content.realContentBz = self.request.get("realContentBz")
         content.put()
         self.redirect("/look?rss=%s" % self.request.get("rss"))
         return
     self.redirect("/")
Beispiel #9
0
	def write_file_to_database(self,m_group_id, m_group_id2, m_title, m_path, m_url, m_level):
		print 'write_to_database'
		'''doc = Documents.objects.filter(url = m_url)
		if (len(doc) == 0):
			doc = Documents(url=m_url, uid=self.uid, title=m_title, level = m_level)
			doc.save()
			print 'create\n'
		else:
			doc[0].uid = self.uid
			doc[0].title = m_title
			# doc.update()
			doc[0].save()
			doc = doc[0]
			print 'update\n'''
		# doc = Documents(title='title', url='url')
		# doc.save()
		# print 'docid is: %s' % str(doc)
		# Delete old records
		res = ResAddr()
		id = res.saveResaddr(m_url, m_level, '', m_title)
		doc = res.getById(id)
		oldContents = Contents.objects.filter(doc = doc)
		oldContents.delete()
		try:
			print '\n\n\nstart write url:', m_url
			reader = ReadBigFile(m_path, blockSize=1024*16)
			block = reader.getNextBlock()

			while(block!=None):
				# print 'm_content' + para
				con = Contents(group_id=doc.id, group_id2=m_group_id2, doc=doc, paragraph=unicode(block), tag='p')
				con.save()
				block = reader.getNextBlock()
			os.remove(m_path)
			return  doc.id
		except Exception as e:
			print e
			print str(e)
			exit()
			return None
Beispiel #10
0
	def write_to_database(self,m_group_id, m_group_id2, m_title, m_content, m_url, m_level):
		print 'write_to_database'
		'''doc = Documents.objects.filter(url = m_url)
		if (len(doc) == 0):
			doc = Documents(url=m_url, uid=self.uid, title=m_title, level = m_level)
			doc.save()
			print 'create\n'
		else:
			doc[0].uid = self.uid
			doc[0].title = m_title
			# doc.update()
			doc[0].save()
			doc = doc[0]
			print 'update\n'''
		# doc = Documents(title='title', url='url')
		# doc.save()
		# print 'docid is: %s' % str(doc)
		# Delete old records
		res = ResAddr()
		id = res.saveResaddr(m_url, m_level, '', m_title)
		doc = res.getById(id)
		oldContents = Contents.objects.filter(doc = doc)
		oldContents.delete()
		import types
		if type(m_content) is types.ListType:
			for para in m_content:
				# print 'm_content' + para
				con = Contents(group_id=doc.id, group_id2=m_group_id2, doc=doc, paragraph=para, tag='p')
				con.save()
		else:
			con = Contents(group_id=doc.id, group_id2=m_group_id2, doc=doc, paragraph=m_content, tag='p')
			con.save()
		return doc.id
Beispiel #11
0
 def get(self):
     rsslist = []
     for r in RSS.all():
         r.r0 = Contents.all().filter("rss =", r).filter("realContentResult =", 0).count()
         r.r1 = Contents.all().filter("rss =", r).filter("realContentResult =", 1).count()
         r.r2 = Contents.all().filter("rss =", r).filter("realContentResult =", 2).count()
         r.r3 = Contents.all().filter("rss =", r).filter("realContentResult =", 3).count()
         r.r4 = Contents.all().filter("rss =", r).filter("realContentResult =", 4).count()
         r.r5 = Contents.all().filter("rss =", r).filter("realContentResult =", 5).count()
         r.r6 = Contents.all().filter("rss =", r).filter("realContentResult =", None).count()
         rsslist.append(r)
     self.render("templates/analysis.html", {"RSSs": rsslist})
Beispiel #12
0
    def write_to_database(self, m_group_id, m_group_id2, m_title, m_content,
                          m_url, m_level):
        print 'write_to_database'
        '''doc = Documents.objects.filter(url = m_url)
		if (len(doc) == 0):
			doc = Documents(url=m_url, uid=self.uid, title=m_title, level = m_level)
			doc.save()
			print 'create\n'
		else:
			doc[0].uid = self.uid
			doc[0].title = m_title
			# doc.update()
			doc[0].save()
			doc = doc[0]
			print 'update\n'''
        # doc = Documents(title='title', url='url')
        # doc.save()
        # print 'docid is: %s' % str(doc)
        # Delete old records
        res = ResAddr()
        id = res.saveResaddr(m_url, m_level, '', m_title)
        doc = res.getById(id)
        oldContents = Contents.objects.filter(doc=doc)
        oldContents.delete()
        import types
        if type(m_content) is types.ListType:
            for para in m_content:
                # print 'm_content' + para
                con = Contents(group_id=doc.id,
                               group_id2=m_group_id2,
                               doc=doc,
                               paragraph=para,
                               tag='p')
                con.save()
        else:
            con = Contents(group_id=doc.id,
                           group_id2=m_group_id2,
                           doc=doc,
                           paragraph=m_content,
                           tag='p')
            con.save()
        return doc.id
Beispiel #13
0
 def get(self, limit):
     self.urls = []
     for content in Contents.all().filter("status =", "1").fetch(int(limit)):
         self.urls.append((content, content.link))
     self.searchRSS()