def setup_perfect_matches_and_transfers(session, batch_name): # Sets up the database with 4 accessions where: # 2 of the accessions has a single perfect match # 1 of the accessions has two perfect matches # 1 of the accessions has no perfect match # and one accession has been transferred. accession_single_perfect_match = AccessionBuilder().set_batch(batch_name).build() restore_single_perfect_match = create_perfect_match(accession_single_perfect_match) session.add(accession_single_perfect_match) session.add(restore_single_perfect_match) accession_multiple_perfect_matches = AccessionBuilder().set_batch(batch_name).build() restore_multiple_perfect_match1 = create_perfect_match(accession_multiple_perfect_matches) restore_multiple_perfect_match2 = create_perfect_match(accession_multiple_perfect_matches) session.add(accession_multiple_perfect_matches) session.add(restore_multiple_perfect_match1) session.add(restore_multiple_perfect_match2) accession_transfer = AccessionBuilder().set_batch(batch_name).build() restore_transfer = create_perfect_match(accession_transfer) transfer = TransferBuilder().set_filepath(restore_transfer.filepath).build() session.add(accession_transfer) session.add(restore_transfer) session.add(transfer) accession_without_match = AccessionBuilder().set_batch(batch_name).build() session.add(accession_without_match) find_perfect_matches(session, get_accessions(session, batch_name)) transfers = session.query(Transfer).all() find_transfer_matches(session, transfers) session.commit()
def test_batch_with_accession_with_perfect_match(self): session = Session() accession = AccessionBuilder().set_batch("batch_to_delete").build() restore = create_perfect_match(accession) session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.perfect_matches)) self.assertEqual(1, len(restore.perfect_matches)) self.assertEqual(accession, restore.perfect_matches[0]) self.assertEqual(restore, accession.perfect_matches[0]) delete_accessions(session, "batch_to_delete") session.commit() accessions = session.query(Accession) self.assertEqual(0, accessions.count()) self.assertEqual([], restore.perfect_matches)
def test_delete_accessions_matched_accession(self): # This should never happen - but we can delete _any_ accession, # even if it has a perfect match. file_stream = io.StringIO() session = Session() accession = AccessionBuilder().build() restore = create_perfect_match(accession) session.add(accession) session.add(restore) accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(1, len(new_matches_found)) perfect_matches_count = session.query(perfect_matches_table).count() self.assertEqual(1, perfect_matches_count) session.commit() accession_id = accession.id unmatched_accessions_found = [accession] delete_accessions(session, unmatched_accessions_found, file_stream) perfect_matches_count = session.query(perfect_matches_table).count() self.assertEqual(0, perfect_matches_count) self.assertEqual( 0, session.query(Accession).filter( Accession.id == accession_id).count()) output_lines = file_stream.getvalue().split('\n') self.assertEqual("", output_lines[0].strip()) self.assertEqual("---- Deleting accessions ----", output_lines[1].strip()) self.assertEqual(f"{accession.batch},{accession.relpath}", output_lines[2].strip())
def test_deleting_accession_using_raw_sql_should_delete_perfect_match(self): session = Session() accession = AccessionBuilder().set_batch("batch_to_delete").build() restore = create_perfect_match(accession) session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) session.commit() self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.perfect_matches)) self.assertEqual(1, len(restore.perfect_matches)) # Delete using raw SQL, not SQLAlchemy, to test ON DELETE CASCADE session.execute("DELETE FROM accessions where accessions.batch = 'batch_to_delete'") session.commit() accessions_count = session.query(Accession).count() restores_count = session.query(Restore).count() matches_count = session.query(perfect_matches_table).count() self.assertEqual(0, accessions_count) self.assertEqual(1, restores_count) # Restores are not affected self.assertEqual(0, matches_count) self.assertEqual([], restore.perfect_matches)
def test_multiple_perfect_matches_to_multiple_accession(self): # Tests that there can be two accessions that both match two restores session = Session() accession1 = AccessionBuilder().build() accession2 = AccessionBuilder().set_md5(accession1.md5)\ .set_bytes(accession1.bytes)\ .set_filename(accession1.filename)\ .build() restore1 = create_perfect_match(accession1) restore2 = create_perfect_match(accession2) session.add(accession1) session.add(accession2) session.add(restore1) session.add(restore2) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(4, len(new_matches_found)) self.assertEqual(2, len(accession1.perfect_matches)) self.assertEqual(2, len(accession2.perfect_matches)) self.assertEqual(2, len(restore1.perfect_matches)) self.assertIn(accession1, restore1.perfect_matches) self.assertIn(accession2, restore1.perfect_matches) self.assertIn(accession1, restore2.perfect_matches) self.assertIn(accession2, restore2.perfect_matches)
def test_deleting_accession_using_orm_should_delete_perfect_match(self): session = Session() accession = AccessionBuilder().set_batch("batch_to_delete").build() restore = create_perfect_match(accession) session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) session.commit() self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.perfect_matches)) self.assertEqual(1, len(restore.perfect_matches)) # Delete using SQLAlchemy ORM session.delete(accession) session.commit() accessions_count = session.query(Accession).count() restores_count = session.query(Restore).count() matches_count = session.query(perfect_matches_table).count() self.assertEqual(0, accessions_count) self.assertEqual(1, restores_count) # Restores are not affected self.assertEqual(0, matches_count) self.assertEqual([], restore.perfect_matches)
def test_accession_with_perfect_match_and_altered_md5_match_and_filename_only_match( self): session = Session() accession = AccessionBuilder().build() perfect_restore = create_perfect_match(accession) altered_restore = create_perfect_match(accession) altered_restore.md5 = 'altered_md5' filename_only_restore = create_perfect_match(accession) filename_only_restore.md5 = 'filename_only_md5' filename_only_restore.bytes = filename_only_restore.bytes + 100 session.add(accession) session.add(perfect_restore) session.add(altered_restore) session.add(filename_only_restore) session.commit() accessions = session.query(Accession) perfect_matches_found = find_perfect_matches(session, accessions) altered_md5_matches_found = find_altered_md5_matches( session, accessions) filename_only_matches_found = find_filename_only_matches( session, accessions) self.assertEqual(1, len(perfect_matches_found)) self.assertEqual(1, len(altered_md5_matches_found)) self.assertEqual(1, len(filename_only_matches_found)) self.assertEqual(perfect_restore, accession.perfect_matches[0]) self.assertEqual(altered_restore, accession.altered_md5_matches[0]) self.assertEqual(filename_only_restore, accession.filename_only_matches[0])
def test_find_untransferred_accessions_with_mixed_batch(self): session = Session() # Set up a batch with some accessions transferred, and others untransferred mixed_transferred_accession = AccessionBuilder().set_batch( "mixed_batch").build() mixed_transferred_restore = create_perfect_match( mixed_transferred_accession) mixed_transfer = TransferBuilder().set_filepath( mixed_transferred_restore.filepath).build() session.add(mixed_transferred_accession) session.add(mixed_transferred_restore) session.add(mixed_transfer) mixed_untransferred_accession = AccessionBuilder().set_batch( "mixed_batch").build() mixed_untransferred_restore = create_perfect_match( mixed_untransferred_accession) session.add(mixed_untransferred_accession) session.add(mixed_untransferred_restore) find_perfect_matches( session, [mixed_transferred_accession, mixed_untransferred_accession]) find_transfer_matches(session, [mixed_transfer]) session.commit() # Verify setup self.assertEqual(1, len(mixed_transferred_accession.perfect_matches)) self.assertEqual(1, len(mixed_transferred_restore.transfers)) self.assertEqual(1, len(mixed_untransferred_accession.perfect_matches)) self.assertEqual(0, len(mixed_untransferred_restore.transfers)) batch = "mixed_batch" untransferred_accessions = find_untransferred_accessions( session, batch) self.assertEqual(1, len(untransferred_accessions)) self.assertEqual(mixed_untransferred_accession.id, untransferred_accessions[0].id)
def test_finding_perfect_matches_more_than_once(self): session = Session() accession = AccessionBuilder().build() restore = create_perfect_match(accession) session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.perfect_matches)) self.assertEqual(1, len(restore.perfect_matches)) accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(0, len(new_matches_found)) self.assertEqual(1, len(accession.perfect_matches)) self.assertEqual(1, len(restore.perfect_matches))
def setUp(self): create_test_engine() engine = Session().get_bind() Base.metadata.create_all(engine) session = Session() self.transferred_accession = AccessionBuilder().set_batch( "transferred_batch").build() self.transferred_restore = create_perfect_match( self.transferred_accession) self.transfer = TransferBuilder().set_filepath( self.transferred_restore.filepath).build() session.add(self.transferred_accession) session.add(self.transferred_restore) session.add(self.transfer) self.untransferred_accession = AccessionBuilder().set_batch( "untransferred_batch").build() self.untransferred_restore = create_perfect_match( self.untransferred_accession) session.add(self.untransferred_accession) session.add(self.untransferred_restore) find_perfect_matches( session, [self.transferred_accession, self.untransferred_accession]) find_transfer_matches(session, [self.transfer]) session.commit() self.assertEqual(1, len(self.transferred_accession.perfect_matches)) self.assertEqual(1, len(self.transferred_restore.transfers)) self.assertEqual(1, len(self.untransferred_accession.perfect_matches)) self.assertEqual(0, len(self.untransferred_restore.transfers))
def test_one_matched_and_one_unmatched_accession(self): session = Session() batch_name = "TestBatch" no_match_accession = AccessionBuilder().set_batch(batch_name).build() session.add(no_match_accession) match_accession = AccessionBuilder().set_batch(batch_name).build() match_restore = create_perfect_match(match_accession) session.add(match_accession) session.add(match_restore) accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(1, len(new_matches_found)) session.commit() unmatched_accessions_found = unmatched_accessions(session, batch_name) self.assertEqual(1, len(unmatched_accessions_found)) self.assertIn(no_match_accession, unmatched_accessions_found)
def test_no_perfect_match(self): session = Session() accession = AccessionBuilder().build() restore = RestoreBuilder().build() session.add(accession) session.add(restore) session.commit() # Verify that at least MD5 checksums are not equal self.assertNotEqual(accession.md5, restore.md5) accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) # No perfect match should be found self.assertEqual(0, len(new_matches_found)) self.assertEqual(0, len(accession.perfect_matches)) self.assertEqual(0, len(restore.perfect_matches))
def test_same_md5_but_not_same_filename(self): session = Session() accession = AccessionBuilder().set_filename('foo.txt').build() restore = create_perfect_match(accession) restore.filename = 'bar.txt' session.add(accession) session.add(restore) session.commit() # Verify that MD5 checksums are the same, but filenames differ self.assertEqual(accession.md5, restore.md5) self.assertNotEqual(accession.filename, restore.filename) accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(0, len(new_matches_found)) self.assertEqual(0, len(accession.perfect_matches)) self.assertEqual(0, len(restore.perfect_matches))
def test_multiple_perfect_matches_to_one_accession(self): session = Session() accession = AccessionBuilder().build() restore1 = create_perfect_match(accession) restore2 = create_perfect_match(accession) session.add(accession) session.add(restore1) session.add(restore2) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) self.assertEqual(2, len(new_matches_found)) self.assertEqual(2, len(accession.perfect_matches)) self.assertEqual(1, len(restore1.perfect_matches)) self.assertEqual(accession, restore1.perfect_matches[0]) self.assertEqual(1, len(restore2.perfect_matches)) self.assertEqual(accession, restore2.perfect_matches[0])
def test_deleting_accession_using_raw_sql_should_delete_perfect_match_not_affect_other_matches(self): session = Session() accession1 = AccessionBuilder().set_batch("batch_to_delete").build() restore1 = create_perfect_match(accession1) accession2 = AccessionBuilder().set_batch("batch_to_preserve").build() restore2 = create_perfect_match(accession2) session.add(accession1) session.add(restore1) session.add(accession2) session.add(restore2) session.commit() accessions = session.query(Accession) new_matches_found = find_perfect_matches(session, accessions) session.commit() self.assertEqual(2, len(new_matches_found)) self.assertEqual(1, len(accession1.perfect_matches)) self.assertEqual(1, len(restore1.perfect_matches)) self.assertEqual(1, len(accession2.perfect_matches)) self.assertEqual(1, len(restore2.perfect_matches)) # Delete using raw SQL, not SQLAlchemy, to test ON DELETE CASCADE connection = session.connection() connection.execute("DELETE FROM accessions where accessions.batch = 'batch_to_delete'") session.commit() accessions_count = session.query(Accession).count() restores_count = session.query(Restore).count() matches_count = session.query(perfect_matches_table).count() self.assertEqual(1, accessions_count) self.assertEqual(2, restores_count) # Restores are no affected self.assertEqual(1, matches_count) self.assertEqual([], restore1.perfect_matches) self.assertIn(accession2, restore2.perfect_matches) self.assertIn(restore2, accession2.perfect_matches)