def start_fetch(self, url, level = 1):
		self.url = url
		result = getFetcher(self.url, self.uid, self.upwd).fetchData()
		subURLs = getSubTotalURLS(result, url)
		if (len(result) != 0):
			title = get_title(result)
			if (len(title) == 0): title = 'Default' + self.url
			# result = self.convert_html_to_content(result)
			result = partPage(result, 0)
			self.write_to_database(1, 1, title, result, self.url, level)
			# self.do_syscmd_reindexer()
		return subURLs
 def start_fetch(self, url, level=1):
     self.url = url
     result = getFetcher(self.url, self.uid, self.upwd).fetchData()
     subURLs = getSubTotalURLS(result, url)
     if (len(result) != 0):
         title = get_title(result)
         if (len(title) == 0): title = 'Default' + self.url
         # result = self.convert_html_to_content(result)
         result = partPage(result, 0)
         self.write_to_database(1, 1, title, result, self.url, level)
         # self.do_syscmd_reindexer()
     return subURLs
Exemple #3
0
#from django.test import TestCase
#from crawler import Crawler_client
from fetchData import getFetcher

# Create your tests here.


#client = Crawler_client("http://www.baidu.com", "t", "t")

fetch = getFetcher(addr = r'https://wiki.wdf.sap.corp/wiki/display/wikisys/All+Spaces', uid = 'i076639', upwd ='')
html = fetch.fetchData()

f = open('data.html', 'w')
f.write(html)
f.close()
Exemple #4
0
#from django.test import TestCase
#from crawler import Crawler_client
from fetchData import getFetcher

# Create your tests here.

#client = Crawler_client("http://www.baidu.com", "t", "t")

fetch = getFetcher(
    addr=r'https://wiki.wdf.sap.corp/wiki/display/wikisys/All+Spaces',
    uid='i076639',
    upwd='')
html = fetch.fetchData()

f = open('data.html', 'w')
f.write(html)
f.close()