def test_try_to_get_a_hidden_pdf_at_root_of_cpd(mock_findcpd, mock_findsibl, mock_tryhidden, mock_writehidden): # if binary already on disk. scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.alias_dir = 'fake/filepath' scrapealias.find_sibling_files = mock_findsibl scrapealias.try_getting_hidden_pdf = mock_tryhidden scrapealias.write_hidden_pdf_if_a_binary = mock_writehidden mock_findcpd.return_value = ('imag1', 'img') mock_findsibl.return_value = ['imag1.img', 'imag2.img', 'imag3.img'] scrapealias.try_to_get_a_hidden_pdf_at_root_of_cpd('fakefile') mock_findcpd.assert_called_with('fake/filepath/Cpd', 'fakefile') mock_findsibl.assert_called_with('fakefile') assert not mock_tryhidden.called assert not mock_writehidden.called # if binary not already on disk scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.alias_dir = 'fake/filepath' scrapealias.find_sibling_files = mock_findsibl scrapealias.try_getting_hidden_pdf = mock_tryhidden scrapealias.write_hidden_pdf_if_a_binary = mock_writehidden mock_findcpd.return_value = ('imag1', 'other') mock_findsibl.return_value = ['imag1.img', 'imag2.img', 'imag3.img'] mock_tryhidden.return_value = 'imag_binary' scrapealias.try_to_get_a_hidden_pdf_at_root_of_cpd('fakefile') mock_findcpd.assert_called_with('fake/filepath/Cpd', 'fakefile') mock_findsibl.assert_called_with('fakefile') mock_tryhidden.assert_called_with('imag1', 'other') mock_writehidden.assert_called_with('imag_binary', 'fake/filepath/Cpd', 'imag1', 'other')
def test_write_hidden_pdf_if_a_binary__xml_received_instead_of_binary_failure( mock_API, binary_decodes_fixture): # binary = """<?xml version="1.0" encoding="utf-8"?><cpd><type>Document-PDF</type><page><pagetitle>Page 1</pagetitle><pagefile>3605.pdfpage</pagefile><pageptr>3604</pageptr></page></cpd>""".encode('utf-8') filepath, pointer, filetype = 'imag_filepath', 'imag_pointer', 'imag_filetype' scrapealias = scrape_cDM.ScrapeAlias('_', '_') assert scrapealias.write_hidden_pdf_if_a_binary( binary_decodes_fixture, filepath, pointer, filetype) is False
def test_find_sibling_files(): scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.tree_snapshot = [['imag_a', ['imagd_1'], ['a1', 'a2', 'a3']], [['imag_b'], [], ['b1', 'b2']], [['imag_c'], [], ['c1', 'c2', 'c3', 'c4']]] assert set(scrapealias.find_sibling_files('a1')) == {'a1', 'a2', 'a3'} assert set(scrapealias.find_sibling_files('b2')) == {'b1', 'b2'} assert set( scrapealias.find_sibling_files('c3')) == {'c1', 'c2', 'c3', 'c4'}
def test_try_getting_hidden_pdf(mock_API): scrapealias = scrape_cDM.ScrapeAlias('imag_path', 'imag_alias') import urllib mock_API.retrieve_binary.return_value = 'actual_imag_binary' assert scrapealias.try_getting_hidden_pdf( 'imag_pointer', 'imag_filetype') == 'actual_imag_binary' mock_API.retrieve_binary.side_effect = urllib.error.HTTPError( 'imag', b"", 42, 43, 'imag_exception occurredd') assert scrapealias.try_getting_hidden_pdf('imag_pointer', 'imag_filetype') is False
def test_calculate_chunks(mock_count_root_objects): scrapealias = scrape_cDM.ScrapeAlias('_', '_') for total, chunksize, chunks in ( (899, 100, 9), (900, 100, 10), (999, 1000, 1), (1, 1, 2), ): scrapealias.count_root_objects.return_value = total assert scrapealias.calculate_chunks(chunksize) == chunks mock_count_root_objects.assert_called_with()
def test_main_loop(mock_docpd, mock_doroot, mock_docoll): scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.do_collection_level_metadata = mock_docoll scrapealias.do_root_level_objects = mock_doroot scrapealias.do_compound_objects = mock_docpd assert not mock_docoll.called assert not mock_doroot.called assert not mock_docpd.called scrapealias.main() mock_docoll.assert_called_with() mock_doroot.assert_called_with() mock_docpd.assert_called_with()
def test_parse_children_of_cpd(mock_arepointers, mock_ETparse, ETparse_fixture): scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.alias_dir = 'imag_dir' scrapealias.are_child_pointers_pdfpages = mock_arepointers scrape_cDM.ET.parse = mock_ETparse mock_ETparse.return_value = ETparse_fixture mock_ETparse.findall = ETparse_fixture mock_arepointers.return_value = True assert scrapealias.parse_children_of_cpd('imag_parent') is False mock_ETparse.assert_called_with('imag_dir/Cpd/imag_parent_cpd.xml') mock_ETparse.findall.assert_called_with('imag_elem', 'imag_parent_cpd.xml') mock_arepointers.return_value = False assert scrapealias.parse_children_of_cpd( 'imag_parent') == 'imag_children_pointers_list'
def test_are_child_pointers_pdfpages(mock_try_to_get, mock_has_pdfpage): scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.try_to_get_a_hidden_pdf_at_root_of_cpd = mock_try_to_get # scrape_cDM.has_pdfpage_elems = mock_has_pdfpage mock_has_pdfpage.return_value = False assert scrapealias.are_child_pointers_pdfpages('imag_list', 'imag_filename') is False mock_has_pdfpage.assert_called_with('imag_list') assert not mock_try_to_get.called mock_has_pdfpage.return_value = True mock_try_to_get.return_value = False assert scrapealias.are_child_pointers_pdfpages('imag_list', 'imag_filename') is True mock_has_pdfpage.assert_called_with('imag_list') mock_try_to_get.assert_called_with('imag_filename') mock_has_pdfpage.return_value = True mock_try_to_get.return_value = True assert scrapealias.are_child_pointers_pdfpages('imag_list', 'imag_filename') is True mock_has_pdfpage.assert_called_with('imag_list') mock_try_to_get.assert_called_with('imag_filename')
def test_do_collection_level_metadata(mock_API, mock_os): mock_os.makedirs.return_value = True mock_API.retrieve_collection_total_recs.return_value = 'total_recs' mock_API.retrieve_collection_metadata.return_value = 'coll_metadata' mock_API.retrieve_collection_fields_json.return_value = 'fields_json' mock_API.retrieve_collecion_fields_xml.return_value = 'fields_xml' scrapealias = scrape_cDM.ScrapeAlias('imag_path', 'imag_alias') scrapealias.alias_dir = 'imag_filepath' mock_os.listdir.return_value = ('Collection_TotalRecs.xml', 'Collection_Metadata.xml', 'Collection_Fields.json', 'Collection_Fields.xml') scrapealias.do_collection_level_metadata() assert not mock_API.retrieve_collection_total_recs.called assert not mock_API.retrieve_collection_metadata.called assert not mock_API.retrieve_collection_fields_json.called assert not mock_API.retrieve_collection_fields_xml.called mock_os.listdir.return_value = ('') scrapealias.do_collection_level_metadata() mock_API.retrieve_collection_total_recs.assert_called_with('imag_alias') mock_API.retrieve_collection_metadata.assert_called_with('imag_alias') mock_API.retrieve_collection_fields_json.assert_called_with('imag_alias') mock_API.retrieve_collection_fields_xml.assert_called_with('imag_alias') fake_json_fields_call = mock_API.retrieve_collection_fields_json( 'imag_alias') fake_xml_fields_call = mock_API.retrieve_collection_fields_xml( 'imag_alias') assert mock_API.write_xml_to_file.call_count == 3 assert mock_API.write_json_to_file.call_count == 1 mock_API.write_json_to_file.assert_called_with(fake_json_fields_call, 'imag_filepath', 'Collection_Fields') mock_API.write_xml_to_file.assert_called_with(fake_xml_fields_call, 'imag_filepath', 'Collection_Fields')
def test_count_root_objects(mock_ET, total_recs_etree_fixture): mock_ET.parse.return_value = total_recs_etree_fixture scrapealias = scrape_cDM.ScrapeAlias('_', '_') scrapealias.alias_dir = 'imag_alias_dir' assert scrapealias.count_root_objects() == 20 mock_ET.parse.assert_called_with('imag_alias_dir/Collection_TotalRecs.xml')