def send_request(self, url, depth=globvar.REDIRECT_MAX_DEPTH): req = request.get_request(url, redirects=globvar.ALLOW_REDIRECTS) self.pool.put(PoolQuery(query.insert_table_crawl_history, req.to_tuple(self.rowid))) i = 0 while (300 <= req.status_code < 400) and i <= depth: if utility.same_fld(utility.get_fld(req.new_location), self.fld): req = request.get_request(req.new_location, redirects=globvar.ALLOW_REDIRECTS) self.pool.put(PoolQuery(query.insert_table_crawl_history, req.to_tuple(self.rowid))) else: # TODO: Log error message here. return None i +=1 if 300 <= req.status_code < 400: return None else: return req
def test_parse_robots(url, expected): robots = robot.Robots() try: req = request.get_request(url) robots.parse_robots(req.text) except: pass assert (robots.rules) == expected
def start_crawling(self): response = request.get_request(self.domain) if response is None: quit(log_error("Cannot start script.")) return pages.PageCollection( self.domain, response, self.domain )
def parse_robots(self): logging.info(f'{self.thread_id}: Parsing robots.txt') url = f'{globvar.scheme}{self.fld}/robots.txt' try: req = request.get_request(url) if req.status_code != 404: self.extractor.robots.parse_robots(req.text) except: logging.error(f'Something went wrong parsing robots.txt url: {url}')
def test_get_request(): req = request.get_request('https://www.google.com') assert(req.status_code) == 200
def test_get_request_timed_out(): url = "http://google.com" req = request.get_request(url, timeout=0.001) assert(req.status_code) == 0
def test_get_request_requestresult(): req = request.get_request('https://www.google.com') assert(type(req)) == request.RequestResult
def start_crawling(self): response = request.get_request(self.domain) if response is None: quit(log_error("Cannot start script.")) return pages.PageCollection(self.domain, response, self.domain)
import json from hash import apply_sha1 from util import create_json_file, update_json_file from request import get_request, post_request response = get_request() data = json.dumps(response.json()) create_json_file(data) json_response = response.json() house_number = json_response['numero_casas'] cipher_text = str(json_response['cifrado']) print(f'House number: {house_number}') print(f'Cipher text: {cipher_text}\n') alphabet_list = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] count = 0 deciphered_text = '' for word in cipher_text.lower(): if str(word).isspace() or word == '.': deciphered_text += word else: position = alphabet_list.index(word) letter = alphabet_list[(position - house_number)]