def do_comparison(good_record, test_record): """Compare two records to see if they are the same. Ths compares the two GenBank record, and will raise an AssertionError if two lines do not match, showing the non-matching lines. """ good_handle = StringIO(good_record) test_handle = StringIO(test_record) while True: good_line = good_handle.readline() test_line = test_handle.readline() if not (good_line) and not (test_line): break if not (good_line): raise AssertionError("Extra info in Test: %r" % test_line) if not (test_line): raise AssertionError("Extra info in Expected: %r" % good_line) test_normalized = " ".join(x for x in test_line.split() if x) good_normalized = " ".join(x for x in good_line.split() if x) assert test_normalized == good_normalized, "Expected does not match Test.\nExpect: %r\nTest: %r\n" % ( good_line, test_line, )
def do_comparison(good_record, test_record): """Compare two records to see if they are the same. Ths compares the two GenBank record, and will raise an AssertionError if two lines do not match, showing the non-matching lines. """ good_handle = StringIO(good_record) test_handle = StringIO(test_record) while True: good_line = good_handle.readline() test_line = test_handle.readline() if not good_line and not test_line: break if not good_line: raise AssertionError("Extra info in Test: %r" % test_line) if not test_line: raise AssertionError("Extra info in Expected: %r" % good_line) test_normalized = " ".join(x for x in test_line.split() if x) good_normalized = " ".join(x for x in good_line.split() if x) assert test_normalized == good_normalized, \ "Expected does not match Test.\nExpect: %r\nTest: %r\n" % (good_line, test_line)
def test_long_names(self): """Various GenBank names which push the column based LOCUS line.""" original = SeqIO.read("GenBank/iro.gb", "gb") self.assertEqual(len(original), 1326) # Acceptability of LOCUS line with length > 80 invalidates some of these tests for name, seq_len, ok in [ ("short", 1, True), ("max_length_of_16", 1000, True), ("overly_long_at_17", 1000, True), ("excessively_long_at_22", 99999, True), ("excessively_long_at_22", 100000, True), ("pushing_the_limits_at_24", 999, True), ("pushing_the_limits_at_24", 1000, True), ("old_max_name_length_was_26", 10, True), # 2 digits ("old_max_name_length_was_26", 9, True)]: # 1 digit # Make the length match the desired target record = original[:] # TODO - Implement Seq * int record.seq = Seq("N" * seq_len, original.seq.alphabet) # Set the identifer to the desired name record.id = record.name = name # Attempt to output the record... if not ok: # e.g. ValueError: Locus identifier 'excessively_long_at_22' is too long self.assertRaises(ValueError, record.format, "gb") continue with warnings.catch_warnings(): # e.g. BiopythonWarning: Stealing space from length field to # allow long name in LOCUS line warnings.simplefilter("ignore", BiopythonWarning) # output = record.format("gb") handle = StringIO() self.assertEqual(1, SeqIO.write(record, handle, "gb")) handle.seek(0) line = handle.readline() self.assertIn(" %s " % name, line) self.assertIn(" %i bp " % seq_len, line) # Splitting based on whitespace rather than position due to # updated GenBank specification name_and_length = line.split()[1:3] self.assertEqual(name_and_length, [name, str(seq_len)], line) handle.seek(0) with warnings.catch_warnings(): # e.g. BiopythonParserWarning: GenBank LOCUS line # identifier over 16 characters warnings.simplefilter("ignore", BiopythonWarning) new = SeqIO.read(handle, "gb") self.assertEqual(name, new.name) self.assertEqual(seq_len, len(new))