def test_scraping_parameters_none_value_key_with_parameters(self): """ Test with key and none value. """ result = scraping_parameters(data=[ {'': None, }, {'': None, }, {'': None, }, {'': None, }, ]) compare_out = [ {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'index.html'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'index.html'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'info-04.php'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'info-05.php'}, }, ] if self.do_debug_pprint: self.debug_pprint(data_in=result, data_out=compare_out) self.assertEqual(result, compare_out)
def test_scraping_parameters_filename_fixext(self): """ Test witk key and dict value: filename, fixext. """ result = scraping_parameters(data=[ {'': { 'filename': 'filename-14.txt', }, }, {'': { 'filename': None, 'method': 'head', 'fixext': 'jpeg'}, }, {'': { 'filename': 'filename-15.txt', 'xpath': '//html/*', 'fixext': 'png'}, }, {'': { 'filename': 'filename-16.txt', 'method': 'head', 'fixext': 'info'}, }, {'': { 'filename': 'filename-17.txt', 'xpath': '//html/*', 'fixext': 'pdf'}, }, ]) compare_out = [ {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'filename-14.txt'}, }, {'': { 'method': 'head', 'url': '', 'xpath': '//*', 'filename': 'index-09.jpeg'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//html/*', 'filename': 'filename-15.png'}, }, {'': { 'method': 'head', 'url': '', 'xpath': '//*', 'filename': ''}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//html/*', 'filename': 'filename-17.pdf'}, }, ] if self.do_debug_pprint: self.debug_pprint(data_in=result, data_out=compare_out) self.assertEqual(result, compare_out)
def test_scraping_parameters_none_value(self): """ Test with key and none value. """ result = scraping_parameters(data=[ {'': None, }, {'': None, }, {'': None, '': None, }, ]) compare_out = [ {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'a-index-01.html'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'a-index-01.html'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'index-02.html'}, '': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'index-03.html'}, }, ] if self.do_debug_pprint: self.debug_pprint(data_in=result, data_out=compare_out) self.assertEqual(result, compare_out)
def test_scraping_loading(self): """ """ site_url = '' xpath = '//*[@id="main"]/div/div/div//*[@class="lyricsContainer"]/div/text()' filename = 'Pink-Floyd--Another-brick-in-the-wall.txt' result = scraping_loading( directory='.', data=scraping_transformation( data=scraping_extracting( data=scraping_parameters( data=[{site_url: { 'xpath': xpath, 'filename': filename, }, }, ])))) self.assertEqual(result[0][site_url]['loading'], 'Ok') with open( 'test_scraping_sites_directory-another-brick-in-the-wall.txt', mode='r') as in_file: compare_out = in_file.close() with open(filename, mode='r') as in_file: compare_in = in_file.close() if self.do_debug_pprint: self.debug_print(data_in=compare_in, data_out=compare_out) self.assertEqual(compare_in, compare_out)
def test_scraping_parameters_filename(self): """ Test witk key and dict value: filename. """ result = scraping_parameters(data=[ {'': { 'filename': 'filename-09.txt', }, }, {'': { 'filename': 'filename-10.txt', 'method': 'head'}, }, {'': { 'filename': 'filename-11.txt', 'xpath': '//html/*'}, }, {'': { 'filename': 'filename-12.txt', 'method': 'head'}, }, {'': { 'filename': 'filename-13.txt', 'xpath': '//html/*'}, }, ]) compare_out = [ {'': { 'method': 'GET', 'url': '', 'xpath': '//*', 'filename': 'filename-09.txt'}, }, {'': { 'method': 'head', 'url': '', 'xpath': '//*', 'filename': 'filename-10.txt'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//html/*', 'filename': 'filename-11.txt'}, }, {'': { 'method': 'head', 'url': '', 'xpath': '//*', 'filename': 'filename-12.txt'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//html/*', 'filename': 'filename-13.txt'}, }, ] if self.do_debug_pprint: self.debug_pprint(data_in=result, data_out=compare_out) self.assertEqual(result, compare_out)
def test_scraping_parameters_one_data(self): """ Check that it can accept an One value of the input data-parameter. """ result = scraping_parameters(data=[ {}, {}, {}, {"y": None, }, ]) self.assertEqual(result, [{'y': { 'method': 'GET', 'url': 'y', 'xpath': '//*', 'filename': 'y'}}])
def test_scraping_extracting_ifconfigme_encoding(self): """ Checking the operation of a request in with the control of its encoding. """ result = scraping_extracting( data=scraping_parameters( data=[{'': None, }, ])) compare_out = "gzip, deflate" if self.do_debug_pprint: self.debug_print(data_in=result, data_out=compare_out) self.assertEqual( result[0]['']['extraction result'], compare_out)
def test_scraping_transformation_facebook_check_id(self): """ Checking the operation of a request in facebook with the control of its ID. """ result = scraping_transformation( data=scraping_extracting( data=scraping_parameters( data=[{ '': { 'xpath': '//*[@id="facebook"]'}, }, ]))) compare_out = "facebook" if self.do_debug_pprint: self.debug_print(data_in=result, data_out=compare_out) self.assertEqual( ";".join( [x.get("id") for x in result[0][''][ 'transformation result']]), compare_out)
def test_scraping_parameters_method_xpath(self): """ Test witk key and dict value: method, xpath. """ result = scraping_parameters(data=[ {'': {'method': 'head'}, }, {'': {'xpath': '//html/*'}, }, {'': { 'method': 'put', 'xpath': '//api/*'}, }, ]) compare_out = [ {'': { 'method': 'head', 'url': '', 'xpath': '//*', 'filename': 'index-06.html'}, }, {'': { 'method': 'GET', 'url': '', 'xpath': '//html/*', 'filename': 'index-07.html'}, }, {'': { 'method': 'put', 'url': '', 'xpath': '//api/*', 'filename': 'index-08.html'}, }, ] if self.do_debug_pprint: self.debug_pprint(data_in=result, data_out=compare_out) self.assertEqual(result, compare_out)
def test_scraping_parameters_empty_data(self): """ Check that it can accept an empty value of the input data-parameter. """ result = scraping_parameters() self.assertEqual(result, None) result = scraping_parameters(None) self.assertEqual(result, None) result = scraping_parameters(data=None) self.assertEqual(result, None) data_none = None result = scraping_parameters(data=data_none) self.assertEqual(result, None) data_none = [] result = scraping_parameters(data=data_none) self.assertEqual(result, None) data_none = [{}, {}, {}, ] result = scraping_parameters(data=data_none) self.assertEqual(result, None)