예제 #1
0
def urls_to_htmls(urls, text_params):
    """ Given a list of urls and a dictionary with the text preprocessing
        parameters, return a list of extracted html for each url.

    """
    return crawl.crawl_urls(
        urls, max_depth=text_params['max_depth'],
        max_links=text_params['max_links'],
        cache_htmls=text_params['cache_htmls'],
        append_htmls=text_params['append_htmls'],
        shelve_db=text_params['shelve_db'])
예제 #2
0
 def get_htmls(self, urls):
     """ Crawl URL(s) and return HTML(s). """
     return crawl_urls(urls, max_depth=self.text_params['max_depth'],
                       max_links=self.text_params['max_links'])