예제 #1
0
 def test_dealstr(self):
     url = [
         "<a href=\"/405964.html class=\"hello\"  ",
         "<a href=\"/66482.html class=\"hello\"",
         "<a href=\"/11158.html class=\"hello\""
     ]
     self.assertEqual(crawler.dealstr(url), ["405964.", "66482.", "11158."])
    def test_againdeal(self):
        firstweb = requests.get('https://tw.news.yahoo.com/society/')
        firstweb.encoding = 'utf-8'
        book = firstweb.text
  
        m = re.findall('<a href=\"/.*html\" class=\"title \"', book)    #m is search all urs list 

        url_list = [crawler.dealstr(m)[0]]
        base_url = 'https://tw.news.yahoo.com/'


        with open('t.json', "wt", encoding = 'utf-8') as output:
            class_list = crawler.againdeal(url_list, output, base_url)

        with open('t.json', 'rt', encoding = 'utf-8') as result:
            self.assertNotEqual(len(result.read()), 0)
        
        os.remove('t.json')
예제 #3
0
    def test_againdeal(self):
        firstweb = requests.get('https://tw.news.yahoo.com/society/')
        firstweb.encoding = 'utf-8'
        book = firstweb.text

        m = re.findall('<a href=\"/.*html\" class=\"title \"',
                       book)  #m is search all urs list

        url_list = [crawler.dealstr(m)[0]]
        base_url = 'https://tw.news.yahoo.com/'

        with open('t.json', "wt", encoding='utf-8') as output:
            class_list = crawler.againdeal(url_list, output, base_url)

        with open('t.json', 'rt', encoding='utf-8') as result:
            self.assertNotEqual(len(result.read()), 0)

        os.remove('t.json')


# use 'python -m unittest test.py' to test
#or use 'notetests test.py' to test in the cmd
 def test_dealstr(self):
     url = ["<a href=\"/405964.html class=\"hello\"  ", "<a href=\"/66482.html class=\"hello\"", "<a href=\"/11158.html class=\"hello\""]
     self.assertEqual(crawler.dealstr(url), ["405964.", "66482.", "11158."])