def getPage(url): """ Wrapper, calls helper.getPage """ common.log(url) if url.startswith("/http") or url.startswith("http"): return helper.getPage(url) else: return helper.getPage(BASE_URL + url)
def getCategories(): """ Returns all categories in the header menu """ html = helper.getPage(BESTOF_BASE_URL) menu = common.parseDOM(html, "ul", attrs = {"class": "menu_1"})[0] lis = common.parseDOM(menu, "li") categories = [] for li in lis: title = common.parseDOM(li, "a")[0] url = common.parseDOM(li, "a", ret = "href")[0] title = common.replaceHTMLCodes(title) url = createUrl(url) categories.append({"title":title,"url":url}) return categories
def getContracts(cur, addresses): transferred = 0 counter = 0 for addr in addresses: # checks if it already is in the database if queries.selectAddrContracts(cur, addr) == 0: # inserts the address in etherscan.io's api and opens the page page = helper.getPage( 'https://api.etherscan.io/api?module=contract&action=getsourcecode&address={}&apikey=RMo8wU2K53Mm' .format(addr)) # extracing the contract res = page.json()['result'][0] if not isinstance(res, str): source = res['SourceCode'] # adds to the address to the set, if there is a contract if len(source) > 0: # removing the comments contract = helper.removeComments(source) # gets the code size codesize = len(contract) # create the AST via script helper.saveToFile(addr, contract) ast = helper.createAST(addr) # adds to database. if queries.insertToContracts(cur, addr, codesize, contract, ast, transferred): counter += 1 helper.clean(addr) else: continue else: continue return counter
def getShows(url): """ Returns all shows in a category """ html = helper.getPage(BESTOF_BASE_URL + url) container = common.parseDOM(html, "div", attrs = {"class":"[^\"']*content_show_videos[^\"']*"})[0] boxes = common.parseDOM(container, "div", attrs = {"class":"show_box"}) shows = [] for box in boxes: href = common.parseDOM(box, "a", attrs = {"rel":"nofollow"}, ret = "href")[0] href = href.replace("http://www.svtplay.se/","") thumb = common.parseDOM(box, "img", ret = "src")[0] thumb = thumb.replace("/medium/", "/large/") title = common.parseDOM(box, "div", attrs = {"class":"image_info_1"})[0] info = {} info["plot"] = common.parseDOM(box, "img", ret = "title")[0] shows.append({"title":title,"url":href,"thumbnail":thumb, "info": info}) return shows
def test_getPageExceptions(self): ''' Testing if getPage handles request exceptions. ''' actual = helper.getPage(self.noneurl) expected = None self.assertEqual(actual, expected)
def test_getPageType(self): ''' Testing if getPage returns a request object. ''' actual = type(helper.getPage(self.url)) expected = type(requests.get(self.url)) self.assertEqual(actual, expected)
def getPage(url): """ Wrapper, calls helper.getPage with SVT's base URL """ return helper.getPage(BASE_URL + url)