def parse_item(self, response): article = Goose().extract(raw_html=response.body) yield Article(title=article.title, text=article.cleaned_text, url=response.url, field=self.name)
def parse_item(self, response): title = response.css('title::text').extract_first() extractor = Extractor(extractor='ArticleExtractor', html=response.body) yield Article(title=title, text=extractor.getText(), url=response.url, field=self.name)
def parse_item(self, response): title = response.css('title::text').extract_first() body = parse_body(response) yield Article(title=title, text=body, url=response.url, field=self.name)