コード例 #1
0
 def filter_images( self,imgs ):
     srcs = []
     images = []
     for img in imgs:
         if img.has_key('src'):
             src = img['src']
             if not src.lower().startswith('http://'):
                 src = relative2absolute( self.url,src )
                 # 判断图片大小,太小不要
             try:
                 im = urlopen( src ).read()
                 if len(im)>MIN_IMG_SIZE:
                     srcs.append( src )
                     #img['src'] = src
                     images.append( img )
             except IOError:
                 pass
     return (srcs,images)
コード例 #2
0
 def filter_images(self, imgs):
     srcs = []
     images = []
     for img in imgs:
         if img.has_key('src'):
             src = img['src']
             if not src.lower().startswith('http://'):
                 src = relative2absolute(self.url, src)
                 # 判断图片大小,太小不要
             try:
                 im = urlopen(src).read()
                 if len(im) > MIN_IMG_SIZE:
                     srcs.append(src)
                     #img['src'] = src
                     images.append(img)
             except IOError:
                 pass
     return (srcs, images)
コード例 #3
0
    def insert_images(self,block,images):
        start = self.title
        end = block.text_list()[-1]
        behind_img = False
        #block.print_ns()

        i = 0                   # 记录block中文本编号
        while start!=end:
            if not isinstance(start,NavigableString) :
                if start.name=='img' and start in images:
                    src = start['src']
                    if not src.lower().startswith('http://'):
                        start['src'] = relative2absolute( self.url,src )
                    #print i,":",str(start),"[]"
                    block.insert( i,start ) 
                    #block.print_ns()
                    i += 1
                    behind_img = True
                elif start.name=='br':
                    #print i,":",str(start),"[]"
                    # 加入换行符
                    block.insert( i,start ) 
                    #block.print_ns()
                    i += 1
                elif start.name in BLOCK_TAGS:
                    behind_img = False
            # NavigableString
            elif start.string.strip():
                # 已经在正文块中
                if start in block.text_list():
                    #print i,":",start.string
                    i += 1 
                    behind_img = False
                # 不在正文块中,在图片后的兄弟文本
                elif behind_img:
                    #print i,":",start.string,"[]"
                    block.insert( i,start )
                    #block.print_ns()
                    i += 1
            start = start.next
    
        return block
コード例 #4
0
    def insert_images(self, block, images):
        start = self.title
        end = block.text_list()[-1]
        behind_img = False
        #block.print_ns()

        i = 0  # 记录block中文本编号
        while start != end:
            if not isinstance(start, NavigableString):
                if start.name == 'img' and start in images:
                    src = start['src']
                    if not src.lower().startswith('http://'):
                        start['src'] = relative2absolute(self.url, src)
                    #print i,":",str(start),"[]"
                    block.insert(i, start)
                    #block.print_ns()
                    i += 1
                    behind_img = True
                elif start.name == 'br':
                    #print i,":",str(start),"[]"
                    # 加入换行符
                    block.insert(i, start)
                    #block.print_ns()
                    i += 1
                elif start.name in BLOCK_TAGS:
                    behind_img = False
            # NavigableString
            elif start.string.strip():
                # 已经在正文块中
                if start in block.text_list():
                    #print i,":",start.string
                    i += 1
                    behind_img = False
                # 不在正文块中,在图片后的兄弟文本
                elif behind_img:
                    #print i,":",start.string,"[]"
                    block.insert(i, start)
                    #block.print_ns()
                    i += 1
            start = start.next

        return block