def start_fetch(self, url, level = 1): self.url = url result = getFetcher(self.url, self.uid, self.upwd).fetchData() subURLs = getSubTotalURLS(result, url) if (len(result) != 0): title = get_title(result) if (len(title) == 0): title = 'Default' + self.url # result = self.convert_html_to_content(result) result = partPage(result, 0) self.write_to_database(1, 1, title, result, self.url, level) # self.do_syscmd_reindexer() return subURLs
def start_fetch(self, url, level=1): self.url = url result = getFetcher(self.url, self.uid, self.upwd).fetchData() subURLs = getSubTotalURLS(result, url) if (len(result) != 0): title = get_title(result) if (len(title) == 0): title = 'Default' + self.url # result = self.convert_html_to_content(result) result = partPage(result, 0) self.write_to_database(1, 1, title, result, self.url, level) # self.do_syscmd_reindexer() return subURLs
#from django.test import TestCase #from crawler import Crawler_client from fetchData import getFetcher # Create your tests here. #client = Crawler_client("http://www.baidu.com", "t", "t") fetch = getFetcher(addr = r'https://wiki.wdf.sap.corp/wiki/display/wikisys/All+Spaces', uid = 'i076639', upwd ='') html = fetch.fetchData() f = open('data.html', 'w') f.write(html) f.close()
#from django.test import TestCase #from crawler import Crawler_client from fetchData import getFetcher # Create your tests here. #client = Crawler_client("http://www.baidu.com", "t", "t") fetch = getFetcher( addr=r'https://wiki.wdf.sap.corp/wiki/display/wikisys/All+Spaces', uid='i076639', upwd='') html = fetch.fetchData() f = open('data.html', 'w') f.write(html) f.close()