def test__init_threads(self): test_url = 'http://example.org' # Test nothing else raises tr = ThreadedRequestHandler([test_url] * 5, RequestData(GET), RequestErrorData(), 5) self.assertEqual(tr.thread_num, len(tr.threads)) self.assertEqual(5, len(tr.handlers)) # Test exceptions # Test it works as it should tr = ThreadedRequestHandler([test_url, test_url, test_url], RequestData(GET), RequestErrorData(), 5) self.assertEqual(3, tr.thread_num) self.assertEqual(1, len(tr.handlers[0].url_list)) tr = ThreadedRequestHandler([test_url, test_url, test_url], RequestData(GET), RequestErrorData(), 2) self.assertEqual(2, tr.thread_num) self.assertEqual(2, len(tr.handlers[0].url_list)) tr = ThreadedRequestHandler([test_url] * 148, RequestData(GET), RequestErrorData(), 10) self.assertEqual(10, tr.thread_num) self.assertEqual(15, len(tr.handlers[0].url_list))
def test___init__(self): # Test nothing else raises for method in VALID_METHODS: RequestData(method) # Test exceptions self.assertRaises(InvalidMethod, RequestData, 'a')
def _api_call(url, client_id, extra=None): """ """ url += '?client_id=%s' % client_id url += extra if extra else '' # print(url) rh = RequestHandler([url], RequestData(GET), RequestErrorData(allow_errors=False)) rh.run() response = rh.responses[0].json() return response
def __init__(self, url, output_directory=None): """ Takes care of handling path and file checks and creations, as well as checking if there's already valid data saved about this domain. :param url: str :param output_directory: str (defaults to ./output) """ # Instantiate instance vars self.url = url self.data = {} self.loaded_flag = False headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0' } self.requester = RequestHandler([''], RequestData(GET, headers=headers), RequestErrorData(allow_errors=False)) # output_directory checks default_path = os.getcwd() + '/output' if output_directory and not os.path.isdir(output_directory): raise InvalidFilePath(output_directory) if not output_directory: if not os.path.isdir(default_path): os.mkdir(default_path) output_directory = default_path # Url specific directory check url_folder_path = output_directory + '/' + url_to_filename(self.url) if not os.path.isdir(url_folder_path): os.mkdir(url_folder_path) else: # Check if file already exists if os.path.isfile(url_folder_path + '/data.json'): try: # Handle bad json with open(url_folder_path + '/data.json', 'r') as json_file: self.data = json.load(json_file) self.loaded_flag = True except json.decoder.JSONDecodeError as e: raise BrokenJsonFile(repr(e)) self.filepath = url_folder_path
def _resolve(resolve_url, client_id): """ Make resolve requests. :param resolve_url: string, what to resolve :param client_id: string, a valid client_id :return: dict, the .json response to the resolve request """ url = 'http://api.soundcloud.com/resolve?url=%s&client_id=%s' % ( resolve_url, client_id) # print(url) rh = RequestHandler([url], RequestData(GET), RequestErrorData(allow_errors=False)) rh.run() response = rh.responses[0].json() return response
def test__request_wrapper(self): with open('test_expected_request_response', 'r') as f: expected = f.read() test_url = 'http://example.org' invalid_err_url = 'asaff' connect_err_url = 'http://examasfafgple.org' # Test nothing else raises rh = RequestHandler([], RequestData(GET), RequestErrorData()) rh._request_wrapper(test_url) # Test exceptions self.assertRaises(InvalidURL, rh._request_wrapper, invalid_err_url) self.assertRaises(ConnectivityError, rh._request_wrapper, connect_err_url) # Test it works as it should response = rh._request_wrapper(test_url) self.assertEqual(expected, response.text)
def test_do_threads(self): test_url = 'http://example.org' error_url = 'http://examasfafgple.org' with open('test_expected_request_response', 'r') as f: expected = f.read() # Test nothing else raises tr = ThreadedRequestHandler([ test_url, test_url, test_url, test_url, test_url, test_url, error_url ], RequestData(GET), RequestErrorData(), 5) tr.do_threads() # Test exceptions # Test it works as it should for response in tr.responses: self.assertEqual(expected, response.text) self.assertEqual(1, len(tr.errors))
def test__handle_url(self): with open('test_expected_request_response', 'r') as f: expected = f.read() test_url = 'http://example.org' connect_err_url = 'http://examasfafgple.org' # Test nothing else raises rh = RequestHandler([test_url, test_url, test_url], RequestData(GET), RequestErrorData()) rh.run() # Test exceptions connectivity_raise = RequestHandler( [connect_err_url], RequestData(GET), RequestErrorData(allow_errors=False)) status_code_raise = RequestHandler([test_url], RequestData(GET), RequestErrorData( allow_errors=False, expected_status_codes=[400])) validation_str_raise = RequestHandler( [test_url], RequestData(GET), RequestErrorData(allow_errors=False, expected_validation_str='afafplfa')) error_str_raise = RequestHandler([test_url], RequestData(GET), RequestErrorData( allow_errors=False, expected_error_str=expected[0:5])) self.assertRaises(ConnectivityError, connectivity_raise.run) self.assertRaises(InvalidStatusCode, status_code_raise.run) self.assertRaises(NoValidationString, validation_str_raise.run) self.assertRaises(ContainsErrorString, error_str_raise.run) # Test it works as it should # # With one rh = RequestHandler([test_url], RequestData(GET), RequestErrorData()) rh.run() self.assertEqual(1, len(rh.responses)) self.assertEqual(0, len(rh.errors)) # # With many rh = RequestHandler([ test_url, test_url, test_url, test_url, connect_err_url, connect_err_url, test_url ], RequestData(GET), RequestErrorData()) rh.run() self.assertEqual(5, len(rh.responses)) self.assertEqual(2, len(rh.errors)) self.assertEqual(ConnectivityError, rh.errors[0]['error']) self.assertEqual(ConnectivityError, rh.errors[1]['error']) # # Status code error check rh = RequestHandler([test_url], RequestData(GET), RequestErrorData(expected_status_codes=[1900])) rh.run() self.assertEqual(1, len(rh.errors)) self.assertEqual(InvalidStatusCode, rh.errors[0]['error']) # # Validation Str error check rh = RequestHandler( [test_url], RequestData(GET), RequestErrorData(expected_validation_str=expected[0:5])) rh.run() self.assertEqual(0, len(rh.errors)) rh = RequestHandler([test_url], RequestData(GET), RequestErrorData(expected_validation_str='alf')) rh.run() self.assertEqual(1, len(rh.errors)) self.assertEqual(NoValidationString, rh.errors[0]['error']) # # Error Str error check rh = RequestHandler([test_url], RequestData(GET), RequestErrorData(expected_error_str='aogfka')) rh.run() self.assertEqual(0, len(rh.errors)) rh = RequestHandler([test_url], RequestData(GET), RequestErrorData(expected_error_str=expected[0:5])) rh.run() self.assertEqual(1, len(rh.errors)) self.assertEqual(ContainsErrorString, rh.errors[0]['error'])