Exemple #1
0
def test_try_to_get_a_hidden_pdf_at_root_of_cpd(mock_findcpd, mock_findsibl,
                                                mock_tryhidden,
                                                mock_writehidden):
    # if binary already on disk.
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.alias_dir = 'fake/filepath'
    scrapealias.find_sibling_files = mock_findsibl
    scrapealias.try_getting_hidden_pdf = mock_tryhidden
    scrapealias.write_hidden_pdf_if_a_binary = mock_writehidden
    mock_findcpd.return_value = ('imag1', 'img')
    mock_findsibl.return_value = ['imag1.img', 'imag2.img', 'imag3.img']
    scrapealias.try_to_get_a_hidden_pdf_at_root_of_cpd('fakefile')
    mock_findcpd.assert_called_with('fake/filepath/Cpd', 'fakefile')
    mock_findsibl.assert_called_with('fakefile')
    assert not mock_tryhidden.called
    assert not mock_writehidden.called

    # if binary not already on disk
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.alias_dir = 'fake/filepath'
    scrapealias.find_sibling_files = mock_findsibl
    scrapealias.try_getting_hidden_pdf = mock_tryhidden
    scrapealias.write_hidden_pdf_if_a_binary = mock_writehidden
    mock_findcpd.return_value = ('imag1', 'other')
    mock_findsibl.return_value = ['imag1.img', 'imag2.img', 'imag3.img']
    mock_tryhidden.return_value = 'imag_binary'
    scrapealias.try_to_get_a_hidden_pdf_at_root_of_cpd('fakefile')
    mock_findcpd.assert_called_with('fake/filepath/Cpd', 'fakefile')
    mock_findsibl.assert_called_with('fakefile')
    mock_tryhidden.assert_called_with('imag1', 'other')
    mock_writehidden.assert_called_with('imag_binary', 'fake/filepath/Cpd',
                                        'imag1', 'other')
Exemple #2
0
def test_write_hidden_pdf_if_a_binary__xml_received_instead_of_binary_failure(
        mock_API, binary_decodes_fixture):
    # binary = """<?xml version="1.0" encoding="utf-8"?><cpd><type>Document-PDF</type><page><pagetitle>Page 1</pagetitle><pagefile>3605.pdfpage</pagefile><pageptr>3604</pageptr></page></cpd>""".encode('utf-8')
    filepath, pointer, filetype = 'imag_filepath', 'imag_pointer', 'imag_filetype'
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    assert scrapealias.write_hidden_pdf_if_a_binary(
        binary_decodes_fixture, filepath, pointer, filetype) is False
Exemple #3
0
def test_find_sibling_files():
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.tree_snapshot = [['imag_a', ['imagd_1'], ['a1', 'a2', 'a3']],
                                 [['imag_b'], [], ['b1', 'b2']],
                                 [['imag_c'], [], ['c1', 'c2', 'c3', 'c4']]]
    assert set(scrapealias.find_sibling_files('a1')) == {'a1', 'a2', 'a3'}
    assert set(scrapealias.find_sibling_files('b2')) == {'b1', 'b2'}
    assert set(
        scrapealias.find_sibling_files('c3')) == {'c1', 'c2', 'c3', 'c4'}
Exemple #4
0
def test_try_getting_hidden_pdf(mock_API):
    scrapealias = scrape_cDM.ScrapeAlias('imag_path', 'imag_alias')
    import urllib
    mock_API.retrieve_binary.return_value = 'actual_imag_binary'
    assert scrapealias.try_getting_hidden_pdf(
        'imag_pointer', 'imag_filetype') == 'actual_imag_binary'
    mock_API.retrieve_binary.side_effect = urllib.error.HTTPError(
        'imag', b"", 42, 43, 'imag_exception occurredd')
    assert scrapealias.try_getting_hidden_pdf('imag_pointer',
                                              'imag_filetype') is False
Exemple #5
0
def test_calculate_chunks(mock_count_root_objects):
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    for total, chunksize, chunks in (
        (899, 100, 9),
        (900, 100, 10),
        (999, 1000, 1),
        (1, 1, 2),
    ):
        scrapealias.count_root_objects.return_value = total
        assert scrapealias.calculate_chunks(chunksize) == chunks
        mock_count_root_objects.assert_called_with()
Exemple #6
0
def test_main_loop(mock_docpd, mock_doroot, mock_docoll):
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.do_collection_level_metadata = mock_docoll
    scrapealias.do_root_level_objects = mock_doroot
    scrapealias.do_compound_objects = mock_docpd
    assert not mock_docoll.called
    assert not mock_doroot.called
    assert not mock_docpd.called
    scrapealias.main()
    mock_docoll.assert_called_with()
    mock_doroot.assert_called_with()
    mock_docpd.assert_called_with()
Exemple #7
0
def test_parse_children_of_cpd(mock_arepointers, mock_ETparse,
                               ETparse_fixture):
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.alias_dir = 'imag_dir'
    scrapealias.are_child_pointers_pdfpages = mock_arepointers
    scrape_cDM.ET.parse = mock_ETparse
    mock_ETparse.return_value = ETparse_fixture
    mock_ETparse.findall = ETparse_fixture
    mock_arepointers.return_value = True
    assert scrapealias.parse_children_of_cpd('imag_parent') is False
    mock_ETparse.assert_called_with('imag_dir/Cpd/imag_parent_cpd.xml')
    mock_ETparse.findall.assert_called_with('imag_elem', 'imag_parent_cpd.xml')
    mock_arepointers.return_value = False
    assert scrapealias.parse_children_of_cpd(
        'imag_parent') == 'imag_children_pointers_list'
Exemple #8
0
def test_are_child_pointers_pdfpages(mock_try_to_get, mock_has_pdfpage):
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.try_to_get_a_hidden_pdf_at_root_of_cpd = mock_try_to_get
    # scrape_cDM.has_pdfpage_elems = mock_has_pdfpage
    mock_has_pdfpage.return_value = False
    assert scrapealias.are_child_pointers_pdfpages('imag_list',
                                                   'imag_filename') is False
    mock_has_pdfpage.assert_called_with('imag_list')
    assert not mock_try_to_get.called
    mock_has_pdfpage.return_value = True
    mock_try_to_get.return_value = False
    assert scrapealias.are_child_pointers_pdfpages('imag_list',
                                                   'imag_filename') is True
    mock_has_pdfpage.assert_called_with('imag_list')
    mock_try_to_get.assert_called_with('imag_filename')
    mock_has_pdfpage.return_value = True
    mock_try_to_get.return_value = True
    assert scrapealias.are_child_pointers_pdfpages('imag_list',
                                                   'imag_filename') is True
    mock_has_pdfpage.assert_called_with('imag_list')
    mock_try_to_get.assert_called_with('imag_filename')
Exemple #9
0
def test_do_collection_level_metadata(mock_API, mock_os):
    mock_os.makedirs.return_value = True
    mock_API.retrieve_collection_total_recs.return_value = 'total_recs'
    mock_API.retrieve_collection_metadata.return_value = 'coll_metadata'
    mock_API.retrieve_collection_fields_json.return_value = 'fields_json'
    mock_API.retrieve_collecion_fields_xml.return_value = 'fields_xml'
    scrapealias = scrape_cDM.ScrapeAlias('imag_path', 'imag_alias')
    scrapealias.alias_dir = 'imag_filepath'

    mock_os.listdir.return_value = ('Collection_TotalRecs.xml',
                                    'Collection_Metadata.xml',
                                    'Collection_Fields.json',
                                    'Collection_Fields.xml')
    scrapealias.do_collection_level_metadata()
    assert not mock_API.retrieve_collection_total_recs.called
    assert not mock_API.retrieve_collection_metadata.called
    assert not mock_API.retrieve_collection_fields_json.called
    assert not mock_API.retrieve_collection_fields_xml.called

    mock_os.listdir.return_value = ('')
    scrapealias.do_collection_level_metadata()
    mock_API.retrieve_collection_total_recs.assert_called_with('imag_alias')
    mock_API.retrieve_collection_metadata.assert_called_with('imag_alias')
    mock_API.retrieve_collection_fields_json.assert_called_with('imag_alias')
    mock_API.retrieve_collection_fields_xml.assert_called_with('imag_alias')
    fake_json_fields_call = mock_API.retrieve_collection_fields_json(
        'imag_alias')
    fake_xml_fields_call = mock_API.retrieve_collection_fields_xml(
        'imag_alias')
    assert mock_API.write_xml_to_file.call_count == 3
    assert mock_API.write_json_to_file.call_count == 1
    mock_API.write_json_to_file.assert_called_with(fake_json_fields_call,
                                                   'imag_filepath',
                                                   'Collection_Fields')
    mock_API.write_xml_to_file.assert_called_with(fake_xml_fields_call,
                                                  'imag_filepath',
                                                  'Collection_Fields')
Exemple #10
0
def test_count_root_objects(mock_ET, total_recs_etree_fixture):
    mock_ET.parse.return_value = total_recs_etree_fixture
    scrapealias = scrape_cDM.ScrapeAlias('_', '_')
    scrapealias.alias_dir = 'imag_alias_dir'
    assert scrapealias.count_root_objects() == 20
    mock_ET.parse.assert_called_with('imag_alias_dir/Collection_TotalRecs.xml')