def test_file_split_by_data_blocks(self, mocker):
        block_1 = [
            "data_block_header",
            "_data_name_A data_value_A",
            "_data_name_B data_value_B"
        ]
        block_2 = [
            "DATA_block_2",
            "loop_",
            "_loop_data_name_A",
            "loop_data_value_A1",
            "loop_data_value_A2"
        ]
        block_3 = [
            "dATa_block_the_third",
            "_data_name_C data_value_C"
        ]
        contents = block_1 + block_2 + block_3
        mocker.patch("builtins.open", mock.mock_open(read_data=str("\n".join(contents))))
        # generate expected output - each data block stored in DataBlock object
        expected = []
        for block in [block_1, block_2, block_3]:
            header, *raw_data = block
            expected.append(DataBlock(header, "\n".join(raw_data)))

        p = CIFParser("/some_directory/some_file.cif")
        p._extract_data_blocks()
        assert p.data_blocks == expected
    def test_can_load_crystal_data_from_multi_data_block_cif(self):
        p = CIFParser("tests/functional/static/valid_cifs/multi_data_block.cif")
        p.parse()

        # basic checks that correct number of data items were caught
        assert len(p.data_blocks) == 20
        assert p.data_blocks[0].header == "data_CSD_CIF_ACAGUG"
        assert p.data_blocks[11].header == "data_CSD_CIF_AHUKOD"
        data_items_1 = p.data_blocks[0].data_items
        data_items_2 = p.data_blocks[11].data_items
        assert len(data_items_1) == 39
        assert len(data_items_2) == 41

        # check loops operated correctly
        assert len(data_items_1["atom_site_label"]) == 119
        assert len(data_items_2["atom_site_label"]) == 69
        assert data_items_1["atom_type_radius_bond"] == \
            ["0.68", "0.23", "1.35", "0.68", "1.02"]
        assert data_items_2["atom_type_radius_bond"] == \
            ["0.68", "0.23", "1.21", "0.64", "1.40", "1.02"]

        # check semicolon text fields assigned correctly
        assert data_items_1["refine_special_details"] == \
            "One of the water molecules is disordered over two sites."
        assert data_items_2["chemical_name_systematic"] == \
            ("tris(bis(Ethylenedithio)tetrathiafulvalene) \n"
             "2,5-difluoro-1,4-bis(iodoethynyl)benzene bromide")

        # check a few inline data items
        assert data_items_1["journal_year"] == "2001"
        assert data_items_1["exptl_crystal_colour"] == "dark brown"
        assert data_items_2["journal_name_full"] == "J.Mater.Chem. "
        assert data_items_2["cell_angle_gamma"] == "76.35(2)"
 def test_parse_method_calls_in_correct_order(self):
     p = mock.Mock(spec=CIFParser)
     data_block = mock.Mock(spec=DataBlock)
     p.data_blocks = [data_block]
     CIFParser.parse(p)
     expected_calls = [
         mock.call._strip_comments_and_blank_lines(),
         mock.call._extract_data_blocks(),
         mock.call.extract_data_items(SEMICOLON_DATA_ITEM),
         mock.call.extract_data_items(INLINE_DATA_ITEM),
         mock.call.extract_loop_data_items()
     ]
     assert p.method_calls + data_block.method_calls == expected_calls
    def test_comments_and_blank_lines_are_stripped_out(self, mocker):
        contents = [
            "# Here is a comment on the first line",
            "# Here is another comment. The next line is just whitespace",
            "\t\t\t\t\t",
            "",
            "_some_normal_line previous_line_was_blank",
            "  _another_normal_line starting_with_whitespace",
            '# Final comment ## with # extra hashes ### in ##'
        ]
        mocker.patch("builtins.open", mock.mock_open(read_data='\n'.join(contents)))
        expected_remaining_lines = contents[4:6]

        p = CIFParser("/some_directory/some_file.cif")
        p._strip_comments_and_blank_lines()
        assert p.raw_data == "\n".join(expected_remaining_lines)
    def test_can_load_crystal_data_from_vesta_cif(self):
        p = CIFParser("tests/functional/static/valid_cifs/calcite_vesta.cif")
        p.parse()

        # basic checks that correct number of data items were caught
        assert p.data_blocks[0].header == 'data_VESTA_phase_1'
        assert len(p.data_blocks) == 1
        data_items = p.data_blocks[0].data_items
        assert len(data_items) == 25

        # check the loops operated correctly
        pos = data_items["symmetry_equiv_pos_as_xyz"]
        assert len(pos) == 36
        assert data_items["atom_site_occupancy"] == ["1.0", "1.0", "1.0"]
        assert data_items["atom_site_aniso_label"] == ["Ca1", "C1", "O1"]

        # check a few inline data items
        assert data_items["cell_length_a"] == "4.9900(2)"
        assert data_items["symmetry_space_group_name_H-M"] == "R -3 c"
        assert data_items["symmetry_Int_Tables_number"] == "167"
    def test_can_load_crystal_data_from_icsd_cif(self):
        p = CIFParser("tests/functional/static/valid_cifs/calcite_icsd.cif")
        p.parse()

        # basic checks that correct number of data items were caught
        assert p.data_blocks[0].header == "data_18166-ICSD"
        assert len(p.data_blocks) == 1
        data_items = p.data_blocks[0].data_items
        assert len(data_items) == 51

        # check the loops operated correctly
        ids = data_items["symmetry_equiv_pos_site_id"]
        assert ids == [str(i) for i in range(1, 37)]
        assert data_items["atom_site_label"] == ["Ca1", "C1", "O1"]
        assert data_items["atom_site_aniso_U_22"] == ["0.01775(90)"]
        assert data_items["publ_author_name"] == \
            ["Chessin, H.", "Hamilton, W.C.", "Post, B."]

        # check a few inline data items
        assert data_items["cell_length_a"] == "4.9900(2)"
        assert data_items["chemical_name_mineral"] == "Calcite"
        assert data_items["cell_formula_units_Z"] == "6"