def test_id(self): '''The ID field is the main ID of this record, which will be used to generate the filename (ie. <ID>.mods.xml).''' for csv_info in ['ID,<mods:note>\n1,asdf\n', 'MODS ID,<mods:note>\n1,asdf\n', '<mods:mods id="">,<mods:note>\n1,asdf\n']: with self.subTest(csv_info=csv_info): with tempfile.TemporaryDirectory() as tmp: process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp) self.assertTrue(os.path.exists(os.path.join(tmp, '1.mods.xml'))) self.assertEqual(len(os.listdir(tmp)), 1)
def test_process_spreadsheet_file_obj(self): with tempfile.TemporaryDirectory() as tmp: file_path = os.path.join('test_files', 'data.xls') with open(file_path, 'rb') as f: process(spreadsheet=f, xml_files_dir=tmp) self.assertTrue(os.path.exists(os.path.join(tmp, 'test1.mods.xml'))) with tempfile.TemporaryDirectory() as tmp: file_path = os.path.join('test_files', 'data.csv') with open(file_path, 'rb') as f: process(spreadsheet=f, xml_files_dir=tmp) self.assertTrue(os.path.exists(os.path.join(tmp, 'test1.mods.xml')))
if __name__ == '__main__': XML_FILES_DIR = "xml_files" parser = ArgumentParser() parser.add_argument('file_name') parser.add_argument('-t', '--type', action='store', dest='type', default='parent', help='type of records (parent or child, default is parent)') parser.add_argument('--force-dates', action='store_true', dest='force_dates', default=False, help='force date conversion even if ambiguous') parser.add_argument('--copy-parent-to-children', action='store_true', dest='copy_parent_to_children', default=False, help='copy parent data into children') parser.add_argument('-s', '--sheet', action='store', dest='sheet', default=1, help='specify the sheet number (starting at 1) in an Excel spreadsheet') parser.add_argument('-r', '--ctrl_row', action='store', dest='row', default=2, help='specify the control row number (starting at 1) in an Excel spreadsheet') parser.add_argument('-i', '--input-encoding', action='store', dest='in_enc', default='utf-8', help='specify the input encoding for CSV files (default is UTF-8)') args = parser.parse_args() process(file_name=args.file_name, xml_files_dir=XML_FILES_DIR, sheet=int(args.sheet), control_row=int(args.row), force_dates=args.force_dates, object_type=args.type, input_encoding=args.in_enc, copy_parent_to_children=args.copy_parent_to_children) sys.exit()
def test_missing_id(self): csv_info = '<mods:abstract>,<mods:note>\nasdf,jkl;\n' with tempfile.TemporaryDirectory() as tmp: with self.assertRaises(ControlRowError): process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
def test_no_ctrl_row(self): csv_info = 'MODS,Note\n1,asdf\n2,jkl;' with tempfile.TemporaryDirectory() as tmp: with self.assertRaises(ControlRowError): process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
def test_find_ctrl_row_second_row(self): csv_info = 'MODS,Note\nmods id,<mods:note>\n1,asdf\n' with tempfile.TemporaryDirectory() as tmp: process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp) self.assertTrue(os.path.exists(os.path.join(tmp, '1.mods.xml'))) self.assertEqual(len(os.listdir(tmp)), 1)
def test_process_duplicate_ids(self): csv_info = 'ID,<mods:note>\n1,asdf\n1,jkl\n' with tempfile.TemporaryDirectory() as tmp: with self.assertRaises(DataError): process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
def test_process_record_missing_data(self): csv_info = 'ID,<mods:note>\n1,asdf\n2,\n3,jkl' with tempfile.TemporaryDirectory() as tmp: with self.assertRaises(DataError): process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
def test_process_minimal_spreadsheet(self): csv_info = 'mods id,<mods:note>\n1,asdf\n' with tempfile.TemporaryDirectory() as tmp: process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp, control_row=1) self.assertTrue(os.path.exists(os.path.join(tmp, '1.mods.xml')))
def test_process_no_xml_files_dir(self): with tempfile.TemporaryDirectory() as tmp: file_path = os.path.join('test_files', 'data.xls') xml_files_dir = os.path.join(tmp, 'xml_files') process(spreadsheet=file_path, xml_files_dir=xml_files_dir) self.assertTrue(os.path.exists(os.path.join(xml_files_dir, 'test1.mods.xml')))
dest='sheet', default=1, help='specify the sheet number (starting at 1) in an Excel spreadsheet' ) parser.add_argument( '-r', '--ctrl_row', action='store', dest='row', default=2, help= 'specify the control row number (starting at 1) in an Excel spreadsheet' ) parser.add_argument( '-i', '--input-encoding', action='store', dest='in_enc', default='utf-8', help='specify the input encoding for CSV files (default is UTF-8)') args = parser.parse_args() process(file_name=args.file_name, xml_files_dir=XML_FILES_DIR, sheet=int(args.sheet), control_row=int(args.row), force_dates=args.force_dates, object_type=args.type, input_encoding=args.in_enc, copy_parent_to_children=args.copy_parent_to_children) sys.exit()