def get_parse_candidate(crawl_candidate): if crawl_candidate.raw_html: return RawHelper.get_parsing_candidate(crawl_candidate.url, crawl_candidate.raw_html) return URLHelper.get_parsing_candidate(crawl_candidate.url)
def get_parse_candidate(crawl_candidate): if crawl_candidate.doc is not None: return SubArticle.get_parsing_candidate(crawl_candidate.doc) if crawl_candidate.raw_html: return RawHelper.get_parsing_candidate(crawl_candidate.url, crawl_candidate.raw_html) return URLHelper.get_parsing_candidate(crawl_candidate.url)