def test_xls(self): dh = DataHandler(os.path.join('test_files', 'data.xls')) mods_records = dh.get_xml_records() self.assertEqual(len(mods_records), 2) self.assertTrue(isinstance(mods_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]['data'], unicode)) self.assertEqual(mods_records[0].field_data()[0]['xml_path'], u'<mods:identifier type="local" displayLabel="Originăl noé.">') self.assertEqual(mods_records[0].field_data()[0]['data'], u'123') self.assertEqual(mods_records[0].field_data()[2]['xml_path'], u'<mods:titleInfo><mods:title>') self.assertEqual(mods_records[0].field_data()[2]['data'], u'Test 1') self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(mods_records[1].group_id, u'test2') self.assertEqual(mods_records[0].xml_id, u'test1') self.assertEqual(mods_records[1].xml_id, u'test2') #test that process_text_date is working right self.assertEqual(mods_records[0].field_data()[4]['data'], u'2005-10-21') #test that we can get the second sheet correctly dh = DataHandler(os.path.join('test_files', 'data.xls'), sheet=2) mods_records = dh.get_xml_records() self.assertEqual(len(mods_records), 1) self.assertEqual(mods_records[0].xml_id, u'mods0001') self.assertEqual(mods_records[0].field_data()[5]['data'], u'2008-10-21')
def test_csv_small(self): dh = DataHandler(os.path.join('test_files', 'data-small.csv')) mods_records = dh.get_xml_records() self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(len(mods_records), 1) self.assertTrue(isinstance(mods_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]['data'], unicode)) self.assertEqual(mods_records[0].field_data()[0]['xml_path'], u'<mods:identifier type="local" displayLabel="Originăl noé.">') self.assertEqual(mods_records[0].field_data()[0]['data'], u'123') self.assertEqual(mods_records[0].field_data()[2]['xml_path'], u'<mods:titleInfo><mods:title>') self.assertEqual(mods_records[0].field_data()[2]['data'], u'Test 1') self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(mods_records[0].field_data()[4]['data'], u'2005-10-21')
def test_xlsx(self): dh = DataHandler(os.path.join('test_files', 'data.xlsx'), obj_type='child') mods_records = dh.get_xml_records() self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(len(mods_records), 2) self.assertTrue(isinstance(mods_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]['data'], unicode)) self.assertEqual(mods_records[0].field_data()[0]['xml_path'], u'<mods:identifier type="local" displayLabel="Originăl noé.">') self.assertEqual(mods_records[0].field_data()[0]['data'], u'123') self.assertEqual(mods_records[0].field_data()[2]['xml_path'], u'<mods:titleInfo><mods:title>') self.assertEqual(mods_records[0].field_data()[2]['data'], u'Test 1') self.assertEqual(mods_records[0].field_data()[4]['data'], u'2005-10-21') self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(mods_records[0].xml_id, u'test1_1') #_1 because it's a child self.assertEqual(mods_records[1].group_id, u'test1') self.assertEqual(mods_records[1].xml_id, u'test1_2')
def test_csv_dwc(self): dh = DataHandler(os.path.join('test_files', 'data_dwc.csv')) xml_records = dh.get_xml_records() self.assertEqual(xml_records[0].group_id, u'test1') self.assertTrue(isinstance(xml_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(xml_records[0].field_data()[0]['data'], unicode)) self.assertEqual(xml_records[0].group_id, u'test1') self.assertEqual(xml_records[0].xml_id, u'test1') self.assertEqual(xml_records[0].field_data()[0]['xml_path'], u'<dwc:higherClassification>') self.assertEqual(xml_records[0].field_data()[0]['data'], u'higher classification') self.assertEqual(xml_records[0].field_data()[1]['xml_path'], u'<dwc:genus>') self.assertEqual(xml_records[0].field_data()[1]['data'], u'Genus') self.assertEqual(xml_records[0].field_data()[2]['xml_path'], u'<dwc:specificEpithet>') self.assertEqual(xml_records[0].field_data()[2]['data'], u'species') self.assertEqual(xml_records[0].field_data()[3]['xml_path'], u'<dwc:infraspecificEpithet>') self.assertEqual(xml_records[0].field_data()[3]['data'], u'variety') self.assertEqual(xml_records[0].field_data()[4]['xml_path'], u'<dwc:taxonRank>') self.assertEqual(xml_records[0].field_data()[4]['data'], u'variety') self.assertEqual(xml_records[0].field_data()[5]['xml_path'], u'<dwc:scientificNameAuthorship>') self.assertEqual(xml_records[0].field_data()[5]['data'], u'Variety Author') self.assertEqual(xml_records[0].field_data()[6]['xml_path'], u'<dwc:acceptedNameUsage>') self.assertEqual(xml_records[0].field_data()[6]['data'], u'Genus species var. variety Variety Author') self.assertEqual(xml_records[1].field_data()[2]['xml_path'], u'<dwc:specificEpithet>') self.assertEqual(xml_records[1].field_data()[2]['data'], u'species2') self.assertEqual(xml_records[1].field_data()[3]['xml_path'], u'<dwc:infraspecificEpithet>') self.assertEqual(xml_records[1].field_data()[3]['data'], u'subspecies') self.assertEqual(xml_records[1].field_data()[4]['xml_path'], u'<dwc:taxonRank>') self.assertEqual(xml_records[1].field_data()[4]['data'], u'subspecies') self.assertEqual(xml_records[1].field_data()[6]['xml_path'], u'<dwc:acceptedNameUsage>') self.assertEqual(xml_records[1].field_data()[6]['data'], u'Genus2 species2 subsp. subspecies Subspecies Author') self.assertEqual(xml_records[2].field_data()[2]['xml_path'], u'<dwc:specificEpithet>') self.assertEqual(xml_records[2].field_data()[2]['data'], u'species3') self.assertEqual(xml_records[2].field_data()[3]['xml_path'], u'<dwc:scientificNameAuthorship>') self.assertEqual(xml_records[2].field_data()[3]['data'], u'Species3 Author') self.assertEqual(xml_records[2].field_data()[4]['xml_path'], u'<dwc:acceptedNameUsage>') self.assertEqual(xml_records[2].field_data()[4]['data'], u'Genus3 species3 Species3 Author')