Beispiel #1
0
 def test_handleHTML_regex_unit_code_test3(self):
     parser = handleHTML.parse()
     result = parser.unit_regex.match("year 2015")
     if (not result):
         pass
     else:
         self.fail()
Beispiel #2
0
 def test_handleHTML_regex_unit_code_test2(self):
     parser = handleHTML.parse()
     result = parser.unit_regex.match("pys101")
     if (result):
         pass
     else:
         self.fail()
Beispiel #3
0
 def test_handleHTML_regex_heading_tag(self):
     parser = handleHTML.parse()
     result = parser.heading_tag_regex.match("h5")
     if (result):
         pass
     else:
         self.fail()
Beispiel #4
0
 def test_handleHTML_regex_test_double_slash(self):
     parser = handleHTML.parse()
     result = parser.double_slash_regex.match("//handbooks.uwa.edu.au/units/unitdetails?code=cits3200")
     if (result):
         pass
     else:
         self.fail()
Beispiel #5
0
 def test_handleHTML_regex_test_http(self):
     parser = handleHTML.parse()
     result = parser.http_regex.match("http://handbooks.uwa.edu.au/units/unitdetails?code=cits3200")
     if (result):
         pass
     else:
         self.fail()
#


from urllib.request import urlopen
from urllib.error import  URLError
import handleHTML
from Stack import Stack


#urls already visited
visited = set()
#urls to visit
stack = Stack()
in_stack = set()
#parser object to manipulate HTML
parser = handleHTML.parse()
#parser.re_init(stack, visited, in_stack)


#recursively load URL via stack

def recursiveload(url, dom, db):
    database = db
    count = 0
    stack.push(url)
    in_stack.add(url)
    while stack.isEmpty() != True:
        try:
            newurl = stack.pop()
            in_stack.remove(newurl)
            if dom in newurl:
Beispiel #7
0
 def test_handleHTML_get_domain(self):
     parser = handleHTML.parse()
     parser.url = "http://handbooks.uwa.edu.au/units/unitdetails?code=cits3200"
     dom = parser.get_domain()
     self.assertEqual(dom, "handbooks.uwa.edu.au")
Beispiel #8
0
 def test_handleHTML_handle_end_tag(self):
     parser = handleHTML.parse()
     parser.feed("<body></body>")
     self.assertTrue(not parser.body_tag_open)
Beispiel #9
0
 def test_handleHTML_handle_start_tag(self):
     parser = handleHTML.parse()
     parser.feed("<body>")
     self.assertTrue(parser.body_tag_open)