def parse_factory(self): try: cells = self.__parse_cells() except KeyError: logger.error('no cells found') except Exception as e: logger.info(e) else: return [self.__wrap_cells(cell) for cell in cells]
def parse_body(self, *args, **kwargs): """ 解析文章内容 返回: body - 文章的内容,字符串形式,保留html标签 """ try: content = self.soup.find(name='div', attrs=self.CONTENT).contents except Exception as e: logger.error(e) return '文章为空' else: return ''.join([str(c) for c in content])
def parse_tag(self, *args, **kwargs): try: return self.parse_author()[5:] except Exception as e: logger.error(e) return '佚名'
def parse_date(self, *args, **kwargs): try: return str(self.soup.find(name='span', attrs=self.DATE).string) except Exception as e: logger.error(e) return 'N/A'