def setUp(self): self.db = create_engine('sqlite:///:memory:', echo=False) self.test_classifications = ("TEST_CLASS1","TEST_CLASS2","TEST_CLASS3","TEST_CLASS4", "TEST_CLASS7") # Create the object under test, use it to create the schema self.loader = DataLoader( self.db, self.test_classifications ) self.metadata = self.loader.db_metadata() self.metadata.create_all(self.db)
def test_unexpected_disallowed_duplicate(self): try: self.load(['data/biblio_single_row.json']) DataLoader( self.db, self.test_classifications, allow_doc_dups=False ).load_biblio('data/biblio_typical.json') self.fail("Exception was expected") except RuntimeError,exc: self.failUnlessEqual("An Integrity error was detected when inserting document WO-2013127697-A1. This indicates "\ "insertion of an existing document, but duplicates have been disallowed", exc.message)
class DataLoaderTests(unittest.TestCase): ###### Preparation / bootstrapping ###### def setUp(self): self.db = create_engine('sqlite:///:memory:', echo=False) self.test_classifications = ("TEST_CLASS1","TEST_CLASS2","TEST_CLASS3","TEST_CLASS4", "TEST_CLASS7") # Create the object under test, use it to create the schema self.loader = DataLoader( self.db, self.test_classifications ) self.metadata = self.loader.db_metadata() self.metadata.create_all(self.db) def test_create_doc_loader(self): self.failUnless( isinstance(self.loader, DataLoader) ) ###### Biblio loading tests ###### def test_write_document_record(self): result = self.load_n_query('data/biblio_single_row.json') row = result.fetchone() self.check_doc_row(row, (1,'WO-2013127697-A1',date(2013,9,6),0,47747634)) def test_write_docs_many(self): result = self.load_n_query('data/biblio_typical.json') rows = result.fetchall() self.failUnlessEqual( 25, len(rows) ) self.check_doc_row( rows[0], (1,'WO-2013127697-A1',date(2013,9,6),0,47747634) ) self.check_doc_row( rows[1], (2,'WO-2013127698-A1',date(2013,9,6),0,47748611) ) self.check_doc_row( rows[24], (25,'WO-2013189394-A2',date(2013,12,27),0,49769540) ) def test_write_docs_duplicates_handled(self): self.load(['data/biblio_single_row.json']) self.load(['data/biblio_typical.json']) rows = self.query_all(['schembl_document']).fetchall() self.failUnlessEqual( 25, len( rows ) ) self.check_doc_row( rows[0], (1,'WO-2013127697-A1',date(2013,9,6),0,47747634) ) def test_unexpected_disallowed_duplicate(self): try: self.load(['data/biblio_single_row.json']) DataLoader( self.db, self.test_classifications, allow_doc_dups=False ).load_biblio('data/biblio_typical.json') self.fail("Exception was expected") except RuntimeError,exc: self.failUnlessEqual("An Integrity error was detected when inserting document WO-2013127697-A1. This indicates "\ "insertion of an existing document, but duplicates have been disallowed", exc.message)
def test_replace_titles(self): # Covers deletion of obsolete titles, insertion of new titles, and modification of existing records updating_loader = DataLoader( self.db, self.test_classifications, overwrite=True ) updating_loader.load_biblio( 'data/biblio_typical.json' ) self.verify_titles( 1, {'DE':u"VERWENDUNG EINES LATENTREAKTIVEN KLEBEFILMS ZUR VERKLEBUNG VON ELOXIERTEM ALUMINIUM MIT KUNSTSTOFF", 'FR':u"UTILISATION D'UN FILM ADHÉSIF À RÉACTIVITÉ LATENTE POUR LE COLLAGE DE PLASTIQUE SUR DE L'ALUMINIUM ANODISÉ", 'EN':u"USE OF A LATENTLY REACTIVE ADHESIVE FILM FOR ADHESIVE BONDING OF ELOXATED ALUMINIUM TO PLASTIC" } ) updating_loader.load_biblio( 'data/biblio_typical_update.json' ) self.verify_titles( 1, {'DE':u"Dis ist der neu titlen", 'ZH':u"寻设备息的传消呼方法和输" } )
def test_replace_document(self): simple_loader = DataLoader( self.db, self.test_classifications, overwrite=True ) simple_loader.load_biblio( 'data/biblio_typical.json' ) rows = self.query_all(['schembl_document']).fetchall() self.failUnlessEqual( 25, len(rows) ) self.check_doc_row( rows[0], (1,'WO-2013127697-A1',date(2013,9,6),0,47747634) ) self.check_doc_row( rows[18], (19,'WO-2013189302-A1',date(2013,12,27),0,49768126) ) simple_loader.load_biblio( 'data/biblio_typical_update.json' ) rows = self.query_all(['schembl_document']).fetchall() self.failUnlessEqual( 25, len(rows) ) self.check_doc_row( rows[0], (1,'WO-2013127697-A1',date(2013,9,5),0,47474747) ) self.check_doc_row( rows[18], (19,'WO-2013189302-A1',date(2013,12,31),1,47474748) ) # This record is now life-sci-relevant
def test_replace_classes(self): # Covers deletion of obsolete classes, insertion of new classes, and modification of existing records updating_loader = DataLoader( self.db, self.test_classifications, overwrite=True ) updating_loader.load_biblio( 'data/biblio_typical.json' ) self.verify_classes( 19, DocumentClass.IPC, []) self.verify_classes( 19, DocumentClass.ECLA, []) self.verify_classes( 19, DocumentClass.IPCR, ["H04W 68/02"]) self.verify_classes( 19, DocumentClass.CPC, ["H04W 68/005"]) updating_loader.load_biblio( 'data/biblio_typical_update.json' ) self.verify_classes( 19, DocumentClass.IPC, ["TEST_CLASS1"]) # Doc now has one IPC class (insertion - life sci relevant) self.verify_classes( 19, DocumentClass.ECLA, ["H04W 76/02"]) # Doc now has one ECLA class (insertion) self.verify_classes( 19, DocumentClass.IPCR, ["B32B 37/12","H04W 68/02"]) # Doc now has one extra IPCR (1 unchanged, 1 insert) self.verify_classes( 19, DocumentClass.CPC, []) # Doc now has zero CPC classes (deletion)
def test_chems_loaded_for_existing_docs(self): extra_loader = DataLoader( self.db, self.test_classifications ) extra_loader.load_biblio( 'data/biblio_typical.json' ) self.load(['data/biblio_typical.json', 'data/chem_single_row_alternative.tsv']) self.verify_chem_mappings([ (15,7676,88,77,66,55,44,33) ])
def prepare_updatable_db(self, overwrite_mode): updating_loader = DataLoader( self.db, self.test_classifications, overwrite=overwrite_mode ) self.load(['data/biblio_typical.json','data/chem_typical.tsv'], loader=updating_loader) self.verify_chem_mappings([ (1,9724,0,0,0,1,0,0), (1,23780,0,0,0,11,0,0),(1,23781,0,0,0,11,0,0),(1,25640,0,0,2,4,0,0) ], doc=1) return updating_loader
def test_disable_classifications(self): simple_loader = DataLoader( self.db, self.test_classifications, load_classifications=False ) simple_loader.load_biblio( 'data/biblio_typical.json' ) rows = self.query_all(['schembl_document_class']).fetchall() self.failUnlessEqual(0, len(rows))
def test_classifications_set(self): default_classes = set(["A01", "A23", "A24", "A61", "A62B","C05", "C06", "C07", "C08", "C09", "C10", "C11", "C12", "C13", "C14","G01N"]) local_loader = DataLoader(self.db) self.failUnlessEqual( default_classes, local_loader.relevant_classifications() ) self.failUnlessEqual( self.test_classifications, self.loader.relevant_classifications() )