def test_006_dbtodat(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) refdata1 = ReferenceData(server=server) refdata2 = ReferenceData() datseqs = refdata2.hlaref['HLA-A*01:01:01:01'] db = refdata1.server[refdata1.dbversion + "_" + 'A'] expected = db.lookup(name='HLA-A*01:01:01:01') expected_seqs1 = get_features(datseqs) expected_seqs2 = get_features(expected) for feat in expected_seqs1: if feat not in expected_seqs2: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs1[feat]), str(expected_seqs2[feat])) server.close() pass
def test_012_exact(self): seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="007_exact") input_seq = self.data_dir + '/exact_seqs.fasta' for ex in self.expected['exact']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] annotation = seqann.annotate(in_seq, locus) self.assertTrue(annotation.exact) self.assertIsNone(annotation.features) self.assertEqual(annotation.method, "match") self.assertIsInstance(annotation, Annotation) self.assertTrue(annotation.complete_annotation) self.assertGreater(len(annotation.annotation.keys()), 1) expected = seqann.refdata.hlaref[allele] expected_seqs = get_features(expected) self.assertGreater(len(annotation.structure), 1) for feat in annotation.structure: self.assertIsInstance(feat, Feature) self.assertEqual(annotation.gfe, ex['gfe']) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(annotation.annotation.keys()), 1) for feat in expected_seqs: if feat not in annotation.annotation: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs[feat]), str(annotation.annotation[feat])) pass
def test_005_select(self): refdata = ReferenceData() input_seq = self.data_dir + '/exact_seqs.fasta' self.assertTrue(refdata.hlaref) self.assertTrue(refdata.seqref) for ex in self.expected['exact']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] annotation = refdata.search_refdata(in_seq, locus) self.assertIsNone(annotation.features) self.assertEqual(annotation.method, "match") self.assertIsInstance(annotation, Annotation) self.assertTrue(annotation.complete_annotation) self.assertGreater(len(annotation.annotation.keys()), 1) expected = refdata.hlaref[allele] expected_seqs = get_features(expected) for feat in expected_seqs: if feat not in annotation.annotation: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs[feat]), str(annotation.annotation[feat])) pass
def test_004_selectserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) refdata = ReferenceData(server=server) input_seq = self.data_dir + '/exact_seqs.fasta' self.assertFalse(refdata.hlaref) self.assertFalse(refdata.seqref) for ex in self.expected['exact']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] annotation = refdata.search_refdata(in_seq, locus) self.assertIsNone(annotation.features) self.assertEqual(annotation.method, "match") self.assertIsInstance(annotation, Annotation) self.assertTrue(annotation.complete_annotation) self.assertGreater(len(annotation.annotation.keys()), 1) db = refdata.server[refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) expected_seqs = get_features(expected) for feat in expected_seqs: if feat not in annotation.annotation: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs[feat]), str(annotation.annotation[feat])) server.close() pass
def test_009_partialambigserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="006_partialambig") input_seq = self.data_dir + '/partial_ambig.fasta' for ex in self.expected['partial_ambig']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") print(str(i), allele) in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertTrue(ann.complete_annotation) self.assertEqual(ann.method, ex['method']) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(ann.annotation.keys()), 1) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) # Make sure only mapped feats exist for mf in ex['missing_feats']: self.assertFalse(mf in ann.annotation) for feat in ex['feats']: if feat in ex['diff']: self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) server.close() pass
def test_005_insertionserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="004_insertion") input_seq = self.data_dir + '/insertion_seqs.fasta' for ex in self.expected['insertion']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertEqual(ann.method, "nt_search") self.assertFalse(ann.missing) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) n_diffs = 0 expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) for feat in expected_seqs: if feat not in ann.annotation: self.assertEqual(feat, None) else: if feat in ex['diff']: n_diffs += 1 self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) diff_len = len(str(ann.annotation[feat].seq)) - \ len(str(expected_seqs[feat])) self.assertEqual(diff_len, ex['lengths'][feat]) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) self.assertEqual(n_diffs, len(ex['diff'])) server.close() pass
def test_021_stringseq(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="015_stringseq") input_seq = self.data_dir + '/exact_seqs.fasta' ex = self.expected['exact'][0] locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seqrec = list(SeqIO.parse(input_seq, "fasta"))[0] in_str = str(in_seqrec.seq) in_seq = in_seqrec.seq ann_str = seqann.annotate(in_str, locus) ann_seq = seqann.annotate(in_seq, locus) for annotation in [ann_str, ann_seq]: self.assertTrue(annotation.exact) self.assertIsNone(annotation.features) self.assertEqual(annotation.method, "match") self.assertIsInstance(annotation, Annotation) self.assertTrue(annotation.complete_annotation) self.assertGreater(len(annotation.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) expected_seqs = get_features(expected) self.assertEqual(annotation.gfe, ex['gfe']) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(annotation.annotation.keys()), 1) self.assertGreater(len(annotation.structure), 1) for feat in annotation.structure: self.assertIsInstance(feat, Feature) for feat in expected_seqs: if feat not in annotation.annotation: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs[feat]), str(annotation.annotation[feat])) server.close() pass
def seqannotation(self, seqrecord, allele, loc): """ Gets the Annotation from the found sequence :return: The Annotation from the found sequence :rtype: Annotation """ #seqrecord = self.seqrecord(allele, loc) complete_annotation = get_features(seqrecord) annotation = Annotation(annotation=complete_annotation, method='match', complete_annotation=True) if self.alignments: alignment = { f: self.annoated_alignments[loc][allele][f]['Seq'] for f in self.annoated_alignments[loc][allele].keys() } annotation.aligned = alignment return annotation
def test_006_insertion(self): seqann = BioSeqAnn(verbosity=verbosity, pid="004_insertion") input_seq = self.data_dir + '/insertion_seqs.fasta' for ex in self.expected['insertion']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertEqual(ann.method, "nt_search") self.assertFalse(ann.missing) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) expected = seqann.refdata.hlaref[allele] self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) n_diffs = 0 expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) for feat in expected_seqs: if feat not in ann.annotation: self.assertEqual(feat, None) else: if feat in ex['diff']: n_diffs += 1 self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) diff_len = len(str(ann.annotation[feat].seq)) - \ len(str(expected_seqs[feat])) self.assertEqual(diff_len, ex['lengths'][feat]) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) self.assertEqual(n_diffs, len(ex['diff'])) pass
def test_010_partialambig(self): seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="006_partialambig") input_seq = self.data_dir + '/partial_ambig.fasta' for ex in self.expected['partial_ambig']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertTrue(ann.complete_annotation) self.assertEqual(ann.method, ex['method']) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) expected = seqann.refdata.hlaref[allele] expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(ann.annotation.keys()), 1) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) # Make sure only mapped feats exist for mf in ex['missing_feats']: self.assertFalse(mf in ann.annotation) for feat in ex['feats']: if feat in ex['diff']: self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) pass
def search_seqs(self, seqrec, in_seq, locus, run=0, partial_ann=None): """ search_seqs - method for annotating a BioPython sequence without alignment :param seqrec: The reference sequence :type seqrec: SeqRecord :param locus: The gene locus associated with the sequence. :type locus: str :param in_seq: The input sequence :type in_seq: SeqRecord :param run: The number of runs that have been done :type run: int :param partial_ann: A partial annotation from a previous step :type partial_ann: :ref:`ann` :rtype: :ref:`ann` Example usage: >>> from Bio.Seq import Seq >>> from seqann.seq_search import SeqSearch >>> inseq = Seq('AGAGACTCTCCCGAGGATTTCGTGTACCAGTTTAAGGCCATGTGCTACTTCACC') >>> sqsrch = SeqSearch() >>> ann = sqsrch.search_seqs(refseqs, inseq) """ # Extract out the sequences and feature names # from the reference sequences # The mapped features will be subtracted from seq_covered # so the final seq_covered number will reflect the remaining # number of base pairs that haven't been mapped. # # The coordinates and mapping will help determine what positions # in the sequence have been mapped and to what features. The # missing blocks variable will be generated using these. structures = get_structures() seq_covered = len(in_seq.seq) coordinates = dict( map(lambda x: [x, 1], [i for i in range(0, len(in_seq.seq) + 1)])) mapping = dict( map(lambda x: [x, 1], [i for i in range(0, len(in_seq.seq) + 1)])) ambig_map = {} found_feats = {} feat_missing = {} method = "nt_search" if not partial_ann else partial_ann.method # If the partial annotation is provided # then make the found_feats equal to # what has already been annotated feats = get_features(seqrec) if partial_ann: found_feats = partial_ann.features if self.verbose and self.verbosity > 4: self.logger.info("Found partial features:") for f in found_feats: self.logger.info(f) # Skip references that only have features # that have already been annoated if len([f for f in feats if f in found_feats]) == len(feats): if self.verbose: self.logger.info("Skipping incomplete refseq") return partial_ann if self.verbose and self.verbosity > 1: self.logger.info("Using partial annotation | " + locus + " " + str(len(partial_ann.features))) coordinates = dict( map(lambda l: [l, 1], [ item for sublist in partial_ann.blocks for item in sublist ])) seq_covered = partial_ann.covered mapping = partial_ann.mapping if self.verbose and self.verbosity > 2: self.logger.info("Partial sequence coverage = " + str(seq_covered)) self.logger.info("Partial sequence metho = " + method) added_feat = {} deleted_coords = {} for feat_name in sorted(feats, key=lambda k: structures[locus][k]): # skip if partial annotation is provided # and the feat name is not one of the # missing features if partial_ann and feat_name not in partial_ann.refmissing: if self.verbose and self.verbosity > 1: self.logger.info("Skipping " + feat_name + " - Already annotated") continue if self.verbose and self.verbosity > 1: self.logger.info("Running seqsearch for " + feat_name) # Search for the reference feature sequence in the # input sequence. Record the coordinates if it's # found and if it's found in multiple spots. If it # is not found, then record that feature as missing. seq_search = nt_search(str(in_seq.seq), str(feats[feat_name])) if len(seq_search) == 2: if self.verbose and self.verbosity > 0: self.logger.info("Found exact match for " + feat_name) seq_covered -= len(str(feats[feat_name])) end = int(len(str(feats[feat_name])) + seq_search[1]) if feat_name == 'three_prime_UTR' \ and len(str(in_seq.seq)) > end: end = len(str(in_seq.seq)) # If the feature is found and it's a five_prime_UTR then # the start should always be 0, so insertions at the # beinging of the sequence will be found. start = seq_search[1] if feat_name != 'five_prime_UTR' else 0 si = seq_search[1]+1 if seq_search[1] != 0 and \ feat_name != 'five_prime_UTR' else 0 # check if this features has already been mapped mapcheck = set( [0 if i in coordinates else 1 for i in range(si, end + 1)]) # Dont map features if they are out of order skip = False if found_feats and len(found_feats) > 0: for f in found_feats: o1 = structures[locus][feat_name] o2 = structures[locus][f] loctyp = loctype(found_feats[f].location.start, found_feats[f].location.end, start, end) if o1 < o2 and loctyp: skip = True if self.verbose: self.logger.info("Skipping map for " + feat_name) elif o2 < o1 and not loctyp: skip = True if self.verbose: self.logger.info("Skipping map for " + feat_name) if 1 not in mapcheck and not skip: for i in range(si, end + 1): if i in coordinates: if feat_name == "exon_8" or feat_name == 'three_prime_UTR': deleted_coords.update({i: coordinates[i]}) del coordinates[i] else: if self.verbose: self.logger.error( "seqsearch - should't be here " + locus + " - " + " - " + feat_name) mapping[i] = feat_name found_feats.update({ feat_name: SeqFeature(FeatureLocation(ExactPosition(start), ExactPosition(end), strand=1), type=feat_name) }) if feat_name == "exon_8" or feat_name == 'three_prime_UTR': added_feat.update({feat_name: feats[feat_name]}) if self.verbose and self.verbosity > 3: self.logger.info("Coordinates | Start = " + str(start) + " - End = " + str(end)) elif (len(seq_search) > 2): if self.verbose and self.verbosity > 1: self.logger.info("Found " + str(len(seq_search)) + " matches for " + feat_name) new_seq = [seq_search[0]] for i in range(1, len(seq_search)): tnp = seq_search[i] + 1 if seq_search[i] in coordinates or tnp in coordinates: new_seq.append(seq_search[i]) seq_search = new_seq if (partial_ann and feat_name == "exon_8" and run > 0): missing_feats = sorted(list(partial_ann.missing.keys())) # * HARD CODED LOGIC * # # > exon8 in class I maps to multiple spots in a sequence, # often in the 3' UTR. These features need to be mapped # last to make sure it's not mapping exon8 incorrectly. if (missing_feats == ['exon_8', 'three_prime_UTR'] and len(seq_search) <= 3): if self.verbose and self.verbosity > 0: self.logger.info("Resolving exon_8") seq_covered -= len(str(feats[feat_name])) end = int(len(str(feats[feat_name])) + seq_search[1]) # If the feature is found and it's a five_prime_UTR then # the start should always be 0, so insertions at the # beinging of the sequence will be found. start = seq_search[1] si = seq_search[1] + 1 if seq_search[1] != 0 else 0 # check if this features has already been mapped mapcheck = set([ 0 if i in coordinates else 1 for i in range(si, end + 1) ]) for i in range(si, end + 1): if i in coordinates: del coordinates[i] else: if self.verbose: self.logger.error( "seqsearch - should't be here " + locus + " - " + " - " + feat_name) mapping[i] = feat_name found_feats.update({ feat_name: SeqFeature(FeatureLocation(ExactPosition(start), ExactPosition(end), strand=1), type=feat_name) }) if self.verbose and self.verbosity > 0: self.logger.info("Coordinates | Start = " + str(start) + " - End = " + str(end)) else: if self.verbose and self.verbosity > 0: self.logger.info("Adding ambig feature " + feat_name) feat_missing.update({feat_name: feats[feat_name]}) ambig_map.update( {feat_name: seq_search[1:len(seq_search)]}) else: if self.verbose and self.verbosity > 0: self.logger.info("Adding ambig feature " + feat_name) feat_missing.update({feat_name: feats[feat_name]}) ambig_map.update( {feat_name: seq_search[1:len(seq_search)]}) else: if self.verbose and self.verbosity > 1: self.logger.info("No match for " + feat_name) feat_missing.update({feat_name: feats[feat_name]}) blocks = getblocks(coordinates) exact_matches = list(found_feats.keys()) # * HARD CODED LOGIC * # # > # # HLA-DRB1 exon3 exact match - with intron1 and 3 missing if ('exon_3' in exact_matches and run == 99 and locus == 'HLA-DRB1' and 'exon_2' in feat_missing and (len(blocks) == 1 or len(blocks) == 2)): for b in blocks: x = b[len(b) - 1] if x == max(list(mapping.keys())): featname = "intron_3" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0] - 1), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) else: featname = "exon_2" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0]), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) seq_covered -= len(b) if self.verbose and self.verbosity > 1: self.logger.info( "Successfully annotated class DRB1 II sequence") return Annotation(features=found_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, method=method, mapping=mapping, exact_match=exact_matches) # If it's a class II sequence and # exon_2 is an exact match # * HARD CODED LOGIC * # # > It's common for exon2 to be fully sequenced # but intron_2 and intron_1 to be partially sequenced, # which can make it hard to annotate those to features. # If there are two missing blocks that is small enough # and they are before and after exon2, then it's very # very likely to be intron_2 and intron_1. if 'exon_2' in exact_matches and len(blocks) == 2 \ and is_classII(locus) and seq_covered < 300: if self.verbose and self.verbosity > 1: self.logger.info("Running search for class II sequence") r = True for b in blocks: x = b[len(b) - 1] if x == max(list(mapping.keys())): x = b[0] - 1 else: x += 1 f = mapping[x] if f != 'exon_2': r = False if r: for b in blocks: x = b[len(b) - 1] if x == max(list(mapping.keys())): featname = "intron_2" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0] - 1), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) else: featname = "intron_1" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0]), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) seq_covered -= len(b) if self.verbose and self.verbosity > 1: self.logger.info( "Successfully annotated class II sequence") return Annotation(features=found_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, method=method, mapping=mapping, exact_match=exact_matches) annotated_feats, mb, mapping = self._resolve_unmapped( blocks, feat_missing, ambig_map, mapping, found_feats, locus, seq_covered) # * HARD CODED LOGIC * # if (not mb and blocks and len(feat_missing.keys()) == 0 and len(ambig_map.keys()) == 0): mb = blocks if mb: # Unmap exon 8 if locus in ['HLA-C', 'HLA-A'] and len(in_seq.seq) < 3000 \ and 'exon_8' in exact_matches: for i in deleted_coords: mapping[i] = 1 coordinates.update(deleted_coords) mb = getblocks(coordinates) feat_missing.update(added_feat) # Delte from found features del exact_matches[exact_matches.index('exon_8')] del found_feats['exon_8'] if 'exon_8' in annotated_feats: del annotated_feats['exon_8'] if 'three_prime_UTR' in found_feats: del found_feats['three_prime_UTR'] if 'three_prime_UTR' in annotated_feats: del annotated_feats['three_prime_UTR'] refmissing = [ f for f in structures[locus] if f not in annotated_feats ] if self.verbose and self.verbosity > 1: self.logger.info("* Annotation not complete *") # Print out what features were missing by the ref if self.verbose and self.verbosity > 2: self.logger.info("Refseq was missing these features = " + ",".join(list(refmissing))) # Print out what features were ambig matches if self.verbose and self.verbosity > 1 and len(ambig_map) > 1: self.logger.info("Features with ambig matches = " + ",".join(list(ambig_map))) # Print out what features were exact matches if self.verbose and self.verbosity > 2 and len(exact_matches) > 1: self.logger.info("Features exact matches = " + ",".join(list(exact_matches))) # Print out what features have been annotated if self.verbose and self.verbosity > 1 and len( annotated_feats) > 1: self.logger.info("Features annotated = " + ",".join(list(annotated_feats))) # Print out what features are missing if self.verbose and self.verbosity > 1 and len(feat_missing) > 1: self.logger.info("Features missing = " + ",".join(list(feat_missing))) annotation = Annotation(features=annotated_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, blocks=mb, method=method, refmissing=refmissing, mapping=mapping, exact_match=exact_matches, annotation=None) else: mb = None # Unmap exon 8 if locus in ['HLA-C', 'HLA-A'] and len(in_seq.seq) < 600 \ and 'exon_8' in exact_matches \ and 'three_prime_UTR' in annotated_feats\ and 'three_prime_UTR' not in exact_matches: for i in deleted_coords: mapping[i] = 1 coordinates.update(deleted_coords) mb = getblocks(coordinates) feat_missing.update(added_feat) del exact_matches[exact_matches.index('exon_8')] del found_feats['exon_8'] if 'exon_8' in annotated_feats: del annotated_feats['exon_8'] if 'three_prime_UTR' in found_feats: del found_feats['three_prime_UTR'] if 'three_prime_UTR' in annotated_feats: del annotated_feats['three_prime_UTR'] if self.verbose: self.logger.info("* No missing blocks after seq_search *") # Print out what features were ambig matches if self.verbose and self.verbosity > 0 and len(ambig_map) > 1: self.logger.info("Features with ambig matches = " + ",".join(list(ambig_map))) # Print out what features were exact matches if self.verbose and self.verbosity > 0 and len(exact_matches) > 1: self.logger.info("Features exact matches = " + ",".join(list(exact_matches))) # Print out what features have been annotated if self.verbose and self.verbosity > 0 and len( annotated_feats) > 1: self.logger.info("Features annotated = " + ",".join(list(annotated_feats))) # Print out what features are missing if self.verbose and self.verbosity > 0 and len(feat_missing) > 1: self.logger.info("Features missing = " + ",".join(list(feat_missing))) annotation = Annotation(features=annotated_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, method=method, blocks=mb, mapping=mapping, exact_match=exact_matches, annotation=None) return annotation