Beispiel #1
0
 def render(item):
     if isinstance(item, LTPage):
         self.outfp.write(
             '<page id="%s" bbox="%s" rotate="%d">\n' %
             (item.pageid, bbox2str(item.bbox), item.rotate))
         for child in item:
             render(child)
         self.outfp.write('</page>\n')
     elif isinstance(item, LTLine):
         self.outfp.write('<line linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTRect):
         self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTPolygon):
         self.outfp.write(
             '<polygon linewidth="%d" bbox="%s" pts="%s"/>\n' %
             (item.linewidth, bbox2str(item.bbox), item.get_pts()))
     elif isinstance(item, LTFigure):
         self.outfp.write('<figure name="%s" bbox="%s">\n' %
                          (item.name, bbox2str(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</figure>\n')
     elif isinstance(item, LTTextLine):
         self.outfp.write('<textline bbox="%s">\n' %
                          bbox2str(item.bbox))
         for child in item:
             render(child)
         self.outfp.write('</textline>\n')
     elif isinstance(item, LTTextBox):
         wmode = ''
         if isinstance(item, LTTextBoxVertical):
             wmode = ' wmode="vertical"'
         self.outfp.write('<textbox id="%d" bbox="%s"%s>\n' %
                          (item.index, bbox2str(item.bbox), wmode))
         for child in item:
             render(child)
         self.outfp.write('</textbox>\n')
     elif isinstance(item, LTChar):
         self.outfp.write(
             '<text font="%s" bbox="%s" size="%.3f">' %
             (enc(item.fontname), bbox2str(item.bbox), item.size))
         self.write_text(item.text)
         self.outfp.write('</text>\n')
     elif isinstance(item, LTText):
         self.outfp.write('<text>%s</text>\n' % item.text)
     elif isinstance(item, LTImage):
         if self.outdir:
             name = self.write_image(item)
             self.outfp.write(
                 '<image src="%s" width="%d" height="%d" />\n' %
                 (enc(name), item.width, item.height))
         else:
             self.outfp.write('<image width="%d" height="%d" />\n' %
                              (item.width, item.height))
     else:
         assert 0, item
     return
Beispiel #2
0
 def render(item):
     if isinstance(item, LTPage):
         self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' %
                          (item.pageid, bbox2str(item.bbox), item.rotate))
         for child in item:
             render(child)
         if item.groups is not None:
             self.outfp.write('<layout>\n')
             for group in item.groups:
                 show_group(group)
             self.outfp.write('</layout>\n')
         self.outfp.write('</page>\n')
     elif isinstance(item, LTLine):
         self.outfp.write('<line linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTRect):
         self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTCurve):
         self.outfp.write('<curve linewidth="%d" bbox="%s" pts="%s"/>\n' %
                          (item.linewidth, bbox2str(item.bbox), item.get_pts()))
     elif isinstance(item, LTFigure):
         self.outfp.write('<figure name="%s" bbox="%s">\n' %
                          (item.name, bbox2str(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</figure>\n')
     elif isinstance(item, LTTextLine):
         self.outfp.write('<textline bbox="%s">\n' % bbox2str(item.bbox))
         for child in item:
             render(child)
         self.outfp.write('</textline>\n')
     elif isinstance(item, LTTextBox):
         wmode = ''
         if isinstance(item, LTTextBoxVertical):
             wmode = ' wmode="vertical"'
         self.outfp.write('<textbox id="%d" bbox="%s"%s>\n' %
                          (item.index, bbox2str(item.bbox), wmode))
         for child in item:
             render(child)
         self.outfp.write('</textbox>\n')
     elif isinstance(item, LTChar):
         self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' %
                          (enc(item.fontname), bbox2str(item.bbox), item.size))
         self.write_text(item.get_text())
         self.outfp.write('</text>\n')
     elif isinstance(item, LTText):
         self.outfp.write('<text>%s</text>\n' % item.get_text())
     elif isinstance(item, LTImage):
         if self.imagewriter is not None:
             name = self.imagewriter.export_image(item)
             self.outfp.write('<image src="%s" width="%d" height="%d" />\n' %
                              (enc(name), item.width, item.height))
         else:
             self.outfp.write('<image width="%d" height="%d" />\n' %
                              (item.width, item.height))
     else:
         assert 0, item
     return
Beispiel #3
0
 def begin_tag(self, tag, props=None):
     s = ''
     if props:
         s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
                      in sorted(props.iteritems()) )
     self.outfp.write('<%s%s>' % (enc(tag.name), s))
     self.tag = tag
     return
Beispiel #4
0
 def begin_tag(self, tag, props=None):
     s = ''
     if isinstance(props, dict):
         s = ''.join(' %s="%s"' % (enc(k), enc(str(v)))
                     for (k, v) in sorted(props.iteritems()))
     self.outfp.write('<%s%s>' % (enc(tag.name), s))
     self._stack.append(tag)
     return
Beispiel #5
0
 def begin_tag(self, tag, props=None):
     s = ''
     if props:
         s = ''.join(' %s="%s"' % (enc(k), enc(str(v)))
                     for (k, v) in sorted(props.iteritems()))
     self.outfp.write('<%s%s>' % (enc(tag.name), s))
     self.tag = tag
     return
Beispiel #6
0
 def begin_tag(self, tag, props=None):
     s = ''
     if isinstance(props, dict):
         s = ''.join(' %s="%s"' % (enc(k), enc(str(v))) for (k, v)
                     in sorted(props.iteritems()))
     self.outfp.write('<%s%s>' % (enc(tag.name), s))
     self._stack.append(tag)
     return
Beispiel #7
0
 def render(item):
     if isinstance(item, LTPage):
         self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' %
                          (item.pageid, bbox2str(item.bbox), item.rotate))
         for child in item:
             render(child)
         self.outfp.write('</page>\n')
     elif isinstance(item, LTLine):
         self.outfp.write('<line linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTRect):
         self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTPolygon):
         self.outfp.write('<polygon linewidth="%d" bbox="%s" pts="%s"/>\n' %
                          (item.linewidth, bbox2str(item.bbox), item.get_pts()))
     elif isinstance(item, LTFigure):
         self.outfp.write('<figure name="%s" bbox="%s">\n' %
                          (item.name, bbox2str(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</figure>\n')
     elif isinstance(item, LTTextLine):
         self.outfp.write('<textline bbox="%s">\n' % bbox2str(item.bbox))
         for child in item:
             render(child)
         self.outfp.write('</textline>\n')
     elif isinstance(item, LTTextBox):
         self.outfp.write('<textbox id="%d" bbox="%s">\n' % (item.index, bbox2str(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</textbox>\n')
     elif isinstance(item, LTChar):
         vertical = ''
         if item.is_vertical():
             vertical = 'vertical="true" '
         self.outfp.write('<text font="%s" %sbbox="%s" size="%.3f">' %
                          (enc(item.font.fontname), vertical,
                           bbox2str(item.bbox), item.get_size()))
         self.write(item.text)
         self.outfp.write('</text>\n')
     elif isinstance(item, LTText):
         self.outfp.write('<text>%s</text>\n' % item.text)
     elif isinstance(item, LTImage):
         if self.outdir:
             name = self.write_image(item)
             self.outfp.write('<image src="%s" width="%d" height="%d" />\n' %
                              (enc(name), item.width, item.height))
         else:
             self.outfp.write('<image width="%d" height="%d" />\n' %
                              (item.width, item.height))
     else:
         assert 0, item
     return
Beispiel #8
0
 def gen_url(self):
     while True:
         try:
             videos = db.video.find(
                 {
                     '$or': [{
                         'tag': []
                     }, {
                         'tag': {
                             '$exists': False,
                         }
                     }]
                 }, {
                     'aid': 1,
                     'bvid': 1
                 }).limit(100)
             for each_video in videos:
                 if 'bvid' in each_video:
                     bvid = each_video['bvid']
                 else:
                     bvid = enc(each_video['aid'])
                 yield 'https://www.bilibili.com/video/BV{}'.format(bvid)
         except Exception as e:
             logging.exception(e)
         sleep(10)
Beispiel #9
0
 async def gen_url(self):
     while True:
         try:
             last_tag = await self.async_db.video_info.find_one(
                 {'tag': {
                     '$exists': True
                 }}, {
                     'aid': 1,
                     'bvid': 1
                 },
                 sort=[('_id', -1)])
             videos = self.async_db.video_info.find(
                 {
                     '_id': {
                         '$gt': last_tag['_id']
                     }
                 }, {
                     'aid': 1,
                     'bvid': 1
                 }).limit(30)
             flag = 0
             async for each_video in videos:
                 flag = 1
                 if 'bvid' in each_video:
                     bvid = each_video['bvid']
                 else:
                     bvid = enc(each_video['aid'])
                 yield 'https://api.bilibili.com/x/tag/archive/tags?bvid={}'.format(
                     bvid)
             if flag == 0:
                 await asyncio.sleep(1)
         except Exception as e:
             logging.exception(e)
 def update_video_interval(self, interval: int, aid, bvid):
     now = datetime.utcnow() + timedelta(hours=8)
     if aid == None:
         aid = enc(bvid)
     if bvid == None:
         bvid = dec(aid)
     return {'next': now, 'interval': interval, 'aid': aid, 'bvid': bvid}
Beispiel #11
0
 def render(item):
     if isinstance(item, LTPage):
         self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' %
                          (item.id, strbbox(item.bbox), item.rotate))
         for child in item:
             render(child)
         self.outfp.write('</page>\n')
     elif isinstance(item, LTLine) and item.direction:
         self.outfp.write('<line linewidth="%d" direction="%s" bbox="%s" />\n' % (item.linewidth, item.direction, strbbox(item.bbox)))
     elif isinstance(item, LTRect):
         self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' % (item.linewidth, strbbox(item.bbox)))
     elif isinstance(item, LTPolygon):
         self.outfp.write('<polygon linewidth="%d" bbox="%s" pts="%s"/>\n' % (item.linewidth, strbbox(item.bbox), item.get_pts()))
     elif isinstance(item, LTFigure):
         self.outfp.write('<figure id="%s" bbox="%s">\n' % (item.id, strbbox(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</figure>\n')
     elif isinstance(item, LTTextLine):
         self.outfp.write('<textline bbox="%s">\n' % strbbox(item.bbox))
         for child in item:
             render(child)
         self.outfp.write('</textline>\n')
     elif isinstance(item, LTTextBox):
         self.outfp.write('<textbox id="%s" bbox="%s">\n' % (item.id, strbbox(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</textbox>\n')
     elif isinstance(item, LTTextItem):
         self.outfp.write('<text font="%s" vertical="%s" bbox="%s" fontsize="%.3f">' %
                          (enc(item.font.fontname), item.is_vertical(),
                           strbbox(item.bbox), item.fontsize))
         self.write(item.text)
         self.outfp.write('</text>\n')
     elif isinstance(item, LTText):
         self.outfp.write('<text>%s</text>\n' % item.text)
     elif isinstance(item, LTImage):
         x = ''
         if self.outdir:
             name = self.write_image(item)
             if name:
                 x = 'name="%s" ' % enc(name)
         self.outfp.write('<image %stype="%s" width="%d" height="%d" />\n' % (x, item.type, item.width, item.height))
     else:
         assert 0, item
     return
Beispiel #12
0
    def video_gen(self):
        while True:
            # 如果存在锁
            if self.db.lock.count_documents({"name": "video_interval"}):
                sleep(0.1)
                continue
            # 挂锁
            self.db.lock.insert({
                "name": "video_interval",
                "date": datetime.datetime.utcnow()
            })
            try:
                d = []
                data = self.db.video_interval.find({
                    'order': {
                        '$exists': True,
                        '$ne': []
                    }
                }).hint("idx_order").limit(100)
                for each in data:
                    d.append(each)
                data = self.db.video_interval.find({
                    'next': {
                        '$lt': datetime.datetime.utcnow()
                    }
                }).limit(100)
                for each in data:
                    d.append(each)
                for data in d:
                    # 如果存在手动操作,则刷新数据
                    if 'order' in data:
                        for order_id in data['order']:
                            self.db.user_record.update_one(
                                {'_id': order_id},
                                {'$set': {
                                    'isExecuted': True
                                }})
                    data['next'] = data['next'] + \
                        datetime.timedelta(seconds=data['interval'])
                    data['order'] = []
                    if 'bvid' not in data:
                        bvid = enc(data['aid']).lstrip("BV")
                        data['bvid'] = bvid
                    if 'aid' not in data:
                        aid = dec(data['bvid'])
                        data['aid'] = aid
                    self.db.video_interval.update_one(
                        {
                            'aid': data['aid'],
                            'bvid': data['bvid']
                        }, {'$set': data})

                # 解锁
                self.db.lock.delete_one({"name": "video_interval"})
                for data in d:
                    yield data
            except Exception as e:
                self.logger.exception(e)
Beispiel #13
0
 def place_image(self, item, borderwidth, x, y, w, h):
     if self.imagewriter is not None:
         name = self.imagewriter.export_image(item)
         self.write('<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" '
                    'width="%d" height="%d" />\n' %
                    (enc(name), borderwidth,
                     x*self.scale, (self._yoffset-y)*self.scale,
                     w*self.scale, h*self.scale))
     return
Beispiel #14
0
 def update_video_interval(self, interval: int, aid, bvid):
   if aid == None:
     aid = enc(bvid)
   if bvid == None:
     bvid = dec(aid)
   return {
       'next': datetime.utcfromtimestamp(0),
       'interval': interval,
       'aid': aid,
       'bvid': bvid
   }
 def render_string(self, textstate, seq):
     font = textstate.font
     text = ''
     for obj in seq:
         if not isinstance(obj, str): continue
         chars = font.decode(obj)
         for cid in chars:
             try:
                 char = font.to_unichr(cid)
                 text += char
             except PDFUnicodeNotDefined:
                 pass
     return enc(text, self.codec)
Beispiel #16
0
 def render_string(self, textstate, seq):
     font = textstate.font
     text = ''
     for obj in seq:
         if not isinstance(obj, str): continue
         chars = font.decode(obj)
         for cid in chars:
             try:
                 char = font.to_unichr(cid)
                 text += char
             except PDFUnicodeNotDefined:
                 pass
     self.outfp.write(enc(text, self.codec))
     return
Beispiel #17
0
 def write_image(self, image):
     if image.type in LITERALS_DCT_DECODE:
         ext = '.jpg'
     else:
         return
     name = image.name+ext
     path = os.path.join(self.outdir, name)
     fp = file(path, 'wb')
     fp.write(image.data)
     fp.close()
     self.outfp.write('<img src="%s" style="position:absolute; left:%dpx; top:%dpx;" '
                      'width="%d" height="%d" />\n' %
                      (enc(name),
                       image.x0*self.scale, (self.yoffset-image.y1)*self.scale,
                       image.width*self.scale, image.height*self.scale))
     return
Beispiel #18
0
 def render(item):
     if isinstance(item, LTPage):
         self.yoffset += item.y1
         self.write_rect('gray', 1, item.x0, item.y1, item.width,
                         item.height)
         if self.showpageno:
             self.outfp.write(
                 '<div style="position:absolute; top:%dpx;">' %
                 ((self.yoffset - item.y1) * self.scale))
             self.outfp.write('<a name="%s">Page %s</a></div>\n' %
                              (page.pageid, page.pageid))
         for child in item:
             render(child)
     elif isinstance(item, LTChar):
         self.write_text(item.text, item.x0, item.y1, item.get_size())
         if self.debug:
             self.write_rect('red', 1, item.x0, item.y1, item.width,
                             item.height)
     elif isinstance(item, LTPolygon):
         self.write_rect('black', 1, item.x0, item.y1, item.width,
                         item.height)
     elif isinstance(item, LTTextLine):
         for child in item:
             render(child)
     elif isinstance(item, LTTextBox):
         self.write_rect('blue', 1, item.x0, item.y1, item.width,
                         item.height)
         for child in item:
             render(child)
         if self.debug:
             self.write_text(str(item.index + 1), item.x0, item.y1, 20)
     elif isinstance(item, LTFigure):
         self.write_rect('green', 1, item.x0, item.y1, item.width,
                         item.height)
         for child in item:
             render(child)
     elif isinstance(item, LTImage):
         if self.outdir:
             name = self.write_image(item)
             self.outfp.write(
                 '<img src="%s" style="position:absolute; left:%dpx; top:%dpx;" '
                 'width="%d" height="%d" />\n' %
                 (enc(name), item.x0 * self.scale,
                  (self.yoffset - item.y1) * self.scale,
                  item.width * self.scale, item.height * self.scale))
     return
Beispiel #19
0
 def render(item):
     if isinstance(item, LTPage):
         self.yoffset += item.y1
         self.write_rect('gray', 1, item.x0, item.y1, item.width, item.height)
         if self.showpageno:
             self.outfp.write('<div style="position:absolute; top:%dpx;">' %
                              ((self.yoffset-item.y1)*self.scale))
             self.outfp.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
         for child in item:
             render(child)
     elif isinstance(item, LTChar):
         self.write_text(item.text, item.x0, item.y1, item.get_size())
         if self.debug:
             self.write_rect('green', 1, item.x0, item.y1, item.width, item.height)
     elif isinstance(item, LTPolygon):
         self.write_rect('black', 1, item.x0, item.y1, item.width, item.height)
     elif isinstance(item, LTTextLine):
         self.write_rect('magenta', 1, item.x0, item.y1, item.width, item.height)
         for child in item:
             render(child)
     elif isinstance(item, LTTextBox):
         self.write_rect('cyan', 1, item.x0, item.y1, item.width, item.height)
         for child in item:
             render(child)
         if self.debug:
             self.write_text(str(item.index+1), item.x0, item.y1, 20)
     elif isinstance(item, LTFigure):
         self.write_rect('yellow', 1, item.x0, item.y1, item.width, item.height)
         for child in item:
             render(child)
     elif isinstance(item, LTImage):
         if self.outdir:
             name = self.write_image(item)
             self.outfp.write('<img src="%s" style="position:absolute; left:%dpx; top:%dpx;" '
                              'width="%d" height="%d" />\n' %
                              (enc(name),
                               item.x0*self.scale, (self.yoffset-item.y1)*self.scale,
                               item.width*self.scale, item.height*self.scale))
     return
Beispiel #20
0
 def video_gen_without_lock(self):
     while True:
         try:
             d = []
             data = self.db.video_interval.find({
                 'next': {
                     '$lt': datetime.datetime.utcnow()
                 }
             }).hint("idx_next").limit(100)
             for each in data:
                 d.append(each)
             for data in d:
                 # 如果存在手动操作,则刷新数据
                 if 'order' in data:
                     for order_id in data['order']:
                         self.db.user_record.update_one(
                             {'_id': order_id},
                             {'$set': {
                                 'isExecuted': True
                             }})
                 data['next'] = data['next'] + \
                     datetime.timedelta(seconds=data['interval'])
                 data['order'] = []
                 if 'bvid' not in data:
                     bvid = enc(data['aid']).lstrip("BV")
                     data['bvid'] = bvid
                 if 'aid' not in data:
                     aid = dec(data['bvid'])
                     data['aid'] = aid
                 self.db.video_interval.update_one(
                     {
                         'aid': data['aid'],
                         'bvid': data['bvid']
                     }, {'$set': data})
             for data in d:
                 yield data
         except Exception as e:
             self.logger.exception(e)
Beispiel #21
0
    async def video_gen_without_lock(self):
        last_data = set()
        batch = 2000
        while True:
            try:
                d = []
                data = self.async_db.video_interval.find({
                    'next': {
                        '$lte': datetime.datetime.utcnow()
                    }
                }).sort([('next', 1)]).hint("idx_next").limit(batch)
                async for each in data:
                    if 'aid' not in each and 'bvid' in each and 'bvid' != '':
                        each['aid'] = dec('BV' + each['bvid'].lstrip('BV'))
                    elif 'bvid' not in each and 'aid' in each or 'bvid' == '':
                        each['bvid'] = enc(each['aid']).lstrip('BV')
                    elif 'aid' in each and 'bvid' in each and each[
                            'aid'] != None and type(
                                each['aid']) != str and each['aid'] > 0:
                        pass
                    else:
                        await self.async_db.video_interval.delete_one(
                            {'_id': each['_id']})
                    d.append(each)
                for data in d:
                    if 'aid' not in data:
                        continue
                    if data['aid'] not in last_data:
                        last_data.add(data['aid'])
                        yield data
                last_data = set()

                if len(d) < batch / 2:
                    await asyncio.sleep(10)
            except Exception as e:
                self.logger.exception(e)
Beispiel #22
0
 def write_text(self, text):
     self.outfp.write(enc(text, self.codec))
     return
Beispiel #23
0
 def write_text(self, text):
     self.outfp.write(enc(text, self.codec))
     return
Beispiel #24
0
    async def video_gen(self):
        while True:
            # 如果存在锁
            if await self.async_db.lock.count_documents(
                {"name": "video_interval"}):
                sleep(0.1)
                continue
            # 挂锁
            await self.async_db.lock.insert_one({
                "name":
                "video_interval",
                "date":
                datetime.datetime.utcnow()
            })
            try:
                d = []
                data = await self.async_db.video_interval.find({
                    'order': {
                        '$exists': True,
                        '$ne': []
                    }
                }).hint("idx_order").limit(100)
                for each in data:
                    d.append(each)
                data = await self.async_db.video_interval.find({
                    'next': {
                        '$lt': datetime.datetime.utcnow()
                    }
                }).limit(100)
                for each in data:
                    d.append(each)
                for data in d:
                    # 如果存在手动操作,则刷新数据
                    if 'order' in data:
                        for order_id in data['order']:
                            await self.async_db.user_record.update_one(
                                {'_id': order_id},
                                {'$set': {
                                    'isExecuted': True
                                }})
                    data['next'] = data['next'] + \
                                   datetime.timedelta(seconds=data['interval'])
                    data['order'] = []
                    try:
                        if 'aid' not in data:
                            data['aid'] = dec('BV' + data['bvid'])
                            filt = {'bvid': data['bvid']}
                        elif 'bvid' not in data:
                            data['bvid'] = enc(data['aid']).lstrip("BV")
                            filt = {'aid': data['aid']}
                        else:
                            filt = {'bvid': data['bvid']}
                    except Exception:
                        if 'aid' in data:
                            await self.async_db.video_interval.delete_many(
                                {'aid': data['aid']})
                        else:
                            await self.async_db.video_interval.delete_many(
                                {'bvid': data['bvid']})
                        continue
                    if await self.async_db.video_interval.count(filt) > 1:
                        await self.async_db.video_interval.delete_many(filt)

                    await self.async_db.video_interval.update_one(
                        filt, {'$set': data})

                # 解锁
                await self.async_db.lock.delete_one({"name": "video_interval"})
                for data in d:
                    yield data
            except Exception as e:
                self.logger.exception(e)
Beispiel #25
0
 def end_tag(self):
     assert self._stack
     tag = self._stack.pop(-1)
     self.outfp.write('</%s>' % enc(tag.name))
     return
Beispiel #26
0
 def end_tag(self):
     assert self._stack
     tag = self._stack.pop(-1)
     self.outfp.write('</%s>' % enc(tag.name))
     return
Beispiel #27
0
 def end_tag(self):
     assert self.tag
     self.outfp.write('</%s>' % enc(self.tag.name))
     self.tag = None
     return
Beispiel #28
0
from db import db
from utils import dec, enc
for each_video in db.video.find({'bvid': {'$exists': False}}, {'aid': 1, 'bvid': 1}).batch_size(20):
  bvid = enc(each_video['aid']).lstrip('BV')
  db.video.update_one({'aid': each_video['aid']}, {'$set': {
      'bvid': bvid
  }})
  print(each_video['aid'], bvid)
  pass
Beispiel #29
0
 def end_tag(self):
     assert self.tag
     self.outfp.write('</%s>' % enc(self.tag.name))
     self.tag = None
     return
Beispiel #30
0
 def write_text(self, text):
     if self.stripcontrol:
         text = self.CONTROL.sub(u'', text)
     self.outfp.write(enc(text, self.codec))
     return
Beispiel #31
0
 def render(item):
     if isinstance(item, LTPage):
         # Get max Y coord
         self._yoffset = item.y1
         self.outfp.write('<page number="%s" id="%s" %s rotate="%d">\n' %
                          (item.pageid, item.pageid, self.scaled_bbox(item), item.rotate))
         for child in item:
             render(child)
         if item.groups is not None:
             if self.simplerOutput:
                 for group in item.groups:
                     show_group(group)
             else:
                 self.outfp.write('<layout>\n')
                 for group in item.groups:
                     show_group(group)
                 self.outfp.write('</layout>\n')
         self.outfp.write('</page>\n')
     elif isinstance(item, LTLine):
         self.outfp.write('<line linewidth="%d" %s />\n' %
                          (item.linewidth, self.scaled_bbox(item)))
     elif isinstance(item, LTRect):
         self.outfp.write('<rect linewidth="%d" %s />\n' %
                          (item.linewidth, self.scaled_bbox(item)))
     elif isinstance(item, LTCurve):
         self.outfp.write('<curve linewidth="%d" %s pts="%s" />\n' %
                          (item.linewidth, self.scaled_bbox(item), item.get_pts()))
     elif isinstance(item, LTFigure):
         self.outfp.write('<figure name="%s" %s>\n' %
                          (item.name, self.scaled_bbox(item)))
         for child in item:
             render(child)
         self.outfp.write('</figure>\n')
     elif isinstance(item, LTTextLine):
         tagName = 'textline'
         if self.simplerOutput:
             tagName = 'text'
         self.outfp.write('<%s %s>\n' % (tagName, self.scaled_bbox(item)))
         for child in item:
             render(child)
         self.outfp.write('</%s>\n' % tagName)
     elif isinstance(item, LTTextBox):
         if self.simplerOutput:
             for child in item:
                 render(child)
         else:
             wmode = ''
             if isinstance(item, LTTextBoxVertical):
                 wmode = ' wmode="vertical"'
             self.outfp.write('<textbox id="%d" %s %s>\n' %
                              (item.index, self.scaled_bbox(item), wmode))
             for child in item:
                 render(child)
             self.outfp.write('</textbox>\n')
     elif isinstance(item, LTChar):
         if self.layoutmode == 'exact':
             self.outfp.write('<text font="%s" %s size="%.3f">' %
                              (enc(item.fontname), self.scaled_bbox(item), item.size))
             self.write_text(item.get_text())
             self.outfp.write('</text>\n')
         else:
             self.write_text(item.get_text())
     elif isinstance(item, LTText):
         if self.layoutmode == 'exact':
             self.outfp.write('<text>%s</text>\n' % item.get_text())
         else:
             self.write_text(item.get_text())
     elif isinstance(item, LTImage):
         if self.imagewriter is not None:
             name = self.imagewriter.export_image(item)
             self.outfp.write('<image src="%s" width="%d" height="%d" />\n' %
                              (enc(name), item.width, item.height))
         else:
             self.outfp.write('<image width="%d" height="%d" />\n' %
                              (item.width, item.height))
     else:
         assert 0, item
     return