class TestEADClass(unittest.TestCase):
    def setUp(self):
        self.ead = EAD(filepath=r"test_files\ead_messy.xml")
        self.test_output_dir = r"test_files\output"

    def test_filename(self):
        self.assertEquals(self.ead.filename, r"ead_messy.xml")

    def test_etree_creation(self):
        self.assertEquals(etree.tostring(self.ead.tree), etree.tostring(etree.parse(r"test_files\ead_messy.xml")))

    def test_pretty_printing(self):
        self.ead.prettyprint(output_dir=self.test_output_dir)
        with open(os.path.join(self.test_output_dir, self.ead.filename)) as f:
            ead_prettyprinted = f.read()

        with open(r"test_files\ead_pretty.xml") as f:
            ideal_output = f.read()

        self.assertEquals(ead_prettyprinted, ideal_output)

        os.remove(os.path.join(self.test_output_dir, self.ead.filename))

    def test_edit_and_output(self):
        new_node = etree.Element("new_node")
        new_node.text = "new text"

        self.ead.tree.xpath("/ead")[0].append(new_node)
        self.ead.prettyprint(output_dir=self.test_output_dir)

        with open(os.path.join(self.test_output_dir, self.ead.filename)) as f:
            ead_prettyprinted = f.read()

        with open(r"test_files\ead_appended.xml") as f:
            ideal_output = f.read()

        self.assertEquals(ead_prettyprinted, ideal_output)
def main(extent_list, input_dir, output_dir):

    edited_filenames = set()
    types = {}

    previous_filename = ""

    # initialize errors csv
    with open("extents_with_errors.csv", mode="ab") as f:
        pass

    new_physdescs = defaultdict(list)
    for filename, xpath, longform_extent_statement in tqdm(extent_list, desc="splitting, parsing, and writing extents...", leave=True):

        # if this file has already been edited, read it from the output directory, not the input directory
        filepath = path.join(input_dir, filename) if filename not in edited_filenames else path.join(output_dir, filename)

        # only make a new root tree and write the old one to file if the filename has changed
        if not previous_filename:
            ead = EAD(filepath)
        elif filename != previous_filename:
            add_new_physdescs_to_tree(ead.tree, new_physdescs)
            new_physdescs = defaultdict(list)
            ead.prettyprint(output_dir)
            ead = EAD(filepath)
        previous_filename = filename

        try:
            # find the xpath of the extent's parent
            parent_of_physdesc_xpath = etree_editor.get_parent_node(ead.tree, xpath)

            physdesc = ead.tree.xpath(xpath)[0].getparent()
            portion = physdesc.get("altrender", "")

            # split original extent text into component parts
            highlevel_extents = split_into_extents(longform_extent_statement)
            is_multiple_extents = True if len(highlevel_extents) > 1 else False
            aspace_components = [split_into_aspace_components(extent, physdesc, portion, is_multiple_extents) for extent in highlevel_extents]


            # create_new_physdesc
            new_physdesc = etree_editor.make_aspace_formatted_physdesc(aspace_components)

            if len(new_physdesc) == 0:
                continue

            new_physdescs[parent_of_physdesc_xpath].append(new_physdesc)

            # remove the old physdesc
            physdesc.getparent().remove(physdesc)

            edited_filenames.add(filename)

            # temp code to get list of all types in collection
            for aspace_component in aspace_components:
                type_ = aspace_component.type_.strip(" ")
                type_ = type_.lstrip("1234567890.- ")
                # type_ = type_.strip(""" .;:()-"'""")
                types[type_] = types.get(type_, 0) + 1

        except (IndexError, ValueError) as e:
            with open("extents_with_errors.csv", mode="ab") as f:
                writer = csv.writer(f)
                writer.writerow([filename, xpath, longform_extent_statement, str(e)])
            continue

    # clean up the changes
    prettify_xml_in_directory(input_dir=output_dir, output_dir=output_dir, eads=tuple(edited_filenames))

    with open("all_types.csv", mode="wb") as f:
        writer = csv.writer(f)
        rows = [[key, value] for key, value in types.items()]
        rows.sort(key=lambda x: -x[1])
        writer.writerows(rows)
 def setUp(self):
     self.ead = EAD(filepath=r"test_files\ead_messy.xml")
     self.test_output_dir = r"test_files\output"