def test_dealstr(self): url = [ "<a href=\"/405964.html class=\"hello\" ", "<a href=\"/66482.html class=\"hello\"", "<a href=\"/11158.html class=\"hello\"" ] self.assertEqual(crawler.dealstr(url), ["405964.", "66482.", "11158."])
def test_againdeal(self): firstweb = requests.get('https://tw.news.yahoo.com/society/') firstweb.encoding = 'utf-8' book = firstweb.text m = re.findall('<a href=\"/.*html\" class=\"title \"', book) #m is search all urs list url_list = [crawler.dealstr(m)[0]] base_url = 'https://tw.news.yahoo.com/' with open('t.json', "wt", encoding = 'utf-8') as output: class_list = crawler.againdeal(url_list, output, base_url) with open('t.json', 'rt', encoding = 'utf-8') as result: self.assertNotEqual(len(result.read()), 0) os.remove('t.json')
def test_againdeal(self): firstweb = requests.get('https://tw.news.yahoo.com/society/') firstweb.encoding = 'utf-8' book = firstweb.text m = re.findall('<a href=\"/.*html\" class=\"title \"', book) #m is search all urs list url_list = [crawler.dealstr(m)[0]] base_url = 'https://tw.news.yahoo.com/' with open('t.json', "wt", encoding='utf-8') as output: class_list = crawler.againdeal(url_list, output, base_url) with open('t.json', 'rt', encoding='utf-8') as result: self.assertNotEqual(len(result.read()), 0) os.remove('t.json') # use 'python -m unittest test.py' to test #or use 'notetests test.py' to test in the cmd
def test_dealstr(self): url = ["<a href=\"/405964.html class=\"hello\" ", "<a href=\"/66482.html class=\"hello\"", "<a href=\"/11158.html class=\"hello\""] self.assertEqual(crawler.dealstr(url), ["405964.", "66482.", "11158."])