def main():
    output = []
    longest_statement = 0
    filename = "../input/all_extents.csv"

    with open(filename, mode="r") as f:
        reader = csv.reader(f)
        reader.next()  # skip the header row

        for extent_row in tqdm(list(reader)):
            ead_filename, extent_xpath, extent_statement = extent_row
            extents_split = split_into_extents(extent_statement)

            for extent in extents_split:
                extent_row.append(extent)
            output.append(extent_row)

            if len(extents_split) > longest_statement:
                longest_statement = len(extents_split)

    with open("../extent_split.csv", mode="wb") as f:
        writer = csv.writer(f)

        header = ["EAD filename", "XPath to extent", "Original extent text"]
        for i in range(longest_statement):
            header.append("split extent segment {}".format(i + 1))
        writer.writerow(header)

        # If we are going to be doing any EAD modifications based on this list, we'll need to iterate over it in reverse
        # to keep the xpaths valid as we make multiple changes to the same file. It's easier to reverse it here than
        # in the transformation code
        output = reversed(output)

        for row in output:
            if len(row) < longest_statement:
                diff = longest_statement - len(row)
                row = add_blank_elements(row, diff)
            writer.writerow(row)
 def check_output_equality(self, original_text, target_list):
     split_extent = split_into_extents(original_text)
     self.assertEqual(split_extent, target_list)