def urls_to_htmls(urls, text_params): """ Given a list of urls and a dictionary with the text preprocessing parameters, return a list of extracted html for each url. """ return crawl.crawl_urls( urls, max_depth=text_params['max_depth'], max_links=text_params['max_links'], cache_htmls=text_params['cache_htmls'], append_htmls=text_params['append_htmls'], shelve_db=text_params['shelve_db'])
def get_htmls(self, urls): """ Crawl URL(s) and return HTML(s). """ return crawl_urls(urls, max_depth=self.text_params['max_depth'], max_links=self.text_params['max_links'])