def test_016_debug(self): seqann = BioSeqAnn(debug={ "seqann": 5, "align": 1, "seq_search": 3, "refdata": 2 }) self.assertTrue(seqann.debug) self.assertEqual(seqann.verbosity, 5) self.assertEqual(seqann.align_verbosity, 1) self.assertEqual(seqann.seqsearch.verbosity, 3) self.assertEqual(seqann.refdata.verbosity, 2) seqann = BioSeqAnn(debug={"seqann": 2, "seq_search": 5}) self.assertTrue(seqann.debug) self.assertEqual(seqann.verbosity, 2) self.assertEqual(seqann.align_verbosity, 0) self.assertEqual(seqann.seqsearch.verbosity, 5) self.assertEqual(seqann.refdata.verbosity, 0) seqann = BioSeqAnn(debug={"gfe": 2, "seq_search": 5}) self.assertTrue(seqann.debug) self.assertTrue(seqann.gfe.verbose) self.assertEqual(seqann.gfe.verbosity, 2) self.assertEqual(seqann.seqsearch.verbosity, 5) self.assertEqual(seqann.refdata.verbosity, 0) seqann = BioSeqAnn(verbose=True, verbosity=3) self.assertFalse(seqann.debug) self.assertEqual(seqann.verbosity, 3) self.assertEqual(seqann.align_verbosity, 3) self.assertEqual(seqann.seqsearch.verbosity, 3) self.assertEqual(seqann.refdata.verbosity, 3) pass
def test_012_exact(self): seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="007_exact") input_seq = self.data_dir + '/exact_seqs.fasta' for ex in self.expected['exact']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] annotation = seqann.annotate(in_seq, locus) self.assertTrue(annotation.exact) self.assertIsNone(annotation.features) self.assertEqual(annotation.method, "match") self.assertIsInstance(annotation, Annotation) self.assertTrue(annotation.complete_annotation) self.assertGreater(len(annotation.annotation.keys()), 1) expected = seqann.refdata.hlaref[allele] expected_seqs = get_features(expected) self.assertGreater(len(annotation.structure), 1) for feat in annotation.structure: self.assertIsInstance(feat, Feature) self.assertEqual(annotation.gfe, ex['gfe']) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(annotation.annotation.keys()), 1) for feat in expected_seqs: if feat not in annotation.annotation: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs[feat]), str(annotation.annotation[feat])) pass
def test_020_skip(self): # import logging # logging.basicConfig(format='%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s', # datefmt='%m/%d/%Y %I:%M:%S %p', # level=logging.INFO) seqann = BioSeqAnn(verbose=False) refdata = seqann.refdata test_list = [ 'HLA-C*07:241', 'HLA-A*01:07', 'HLA-A*01:01:59', 'HLA-A*01:09:01:01', 'HLA-A*02:545', 'HLA-A*29:13', 'HLA-A*24:03:02', 'HLA-A*02:544', 'HLA-DQA1*04:01:01:01', 'HLA-A*01:217', 'HLA-A*01:22N', 'HLA-B*51:42', 'HLA-C*03:04:05', 'HLA-A*01:01:01:04', 'HLA-A*01:09:01:01', 'HLA-B*82:01' ] for seqname in refdata.hlaref: if seqname not in test_list: continue print(seqname) seqrec = refdata.hlaref[seqname] locus = seqrec.description.split("*")[0] ann1 = seqann.annotate(seqrec, locus=locus) ann2 = seqann.annotate(seqrec, locus=locus, skip=[seqname]) self.assertTrue(ann1.exact) self.assertEqual(len(ann2.annotation), len(ann1.annotation)) self.assertEqual(ann1.gfe, ann2.gfe) self.assertGreater(len(ann2.structure), 1) for feat in ann2.structure: self.assertIsInstance(feat, Feature) for f in ann1.annotation: self.assertTrue(f in ann2.annotation) seq1 = str(ann1.annotation[f]) seq2 = str(ann2.annotation[f].seq) self.assertEqual(seq1, seq2) pass
def test_001_align(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) # TODO *** NOT WORKING WITH NO LOCUS *** # TODO *** FIX 3290 Alignments *** # TODO *** GET ALIGNMENTS WORKING WITH DB SEQS *** seqann = BioSeqAnn(server=server, align=True) input_seq = self.data_dir + '/align_tests.fasta' for ex in self.expected['align']: i = int(ex['index']) ex = self.expected['align'][i] locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] annotation = seqann.annotate(in_seq, "HLA-A") align = "".join( [annotation.aligned[s] for s in annotation.aligned]) for i in range(0, len(align) - 1): if str(i) in ex['diffs']: self.assertEqual(list(align)[i], ex['diffs'][str(i)]) else: if list(align)[i] != list(ex['alignment'])[i]: print("FAILED:", allele, i, list(align)[i], list(ex['alignment'])[i]) self.assertEqual(list(align)[i], list(ex['alignment'])[i]) server.close()
def gfeNotation_post(sequence, locus, gene): """ gfeNotation_post GFE notations associated with the sequence :param locus: Valid HLA locus :param sequence: Valid sequence :param gene : Kir true or false :rtype: Feature and gfe """ kir = gene sequence = SeqRecord(seq=Seq(sequence['sequence'])) log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) gfe = GFE() if kir: seqann = BioSeqAnn(kir=True) else: seqann = BioSeqAnn() try: annotation = seqann.annotate(sequence) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("An error occured during the annotation", log=log_contents.split("\n")), 404 try: res_feature, res_gfe = gfe.get_gfe(annotation, locus) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("An error occurred in getting the gfe of annotation", log=log_contents.split("\n")), 404 feats = [] for f in res_feature: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) feats.append(fn) return {'gfe': res_gfe, 'feature': feats}
def test_005_insertionserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="004_insertion") input_seq = self.data_dir + '/insertion_seqs.fasta' for ex in self.expected['insertion']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertEqual(ann.method, "nt_search") self.assertFalse(ann.missing) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) n_diffs = 0 expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) for feat in expected_seqs: if feat not in ann.annotation: self.assertEqual(feat, None) else: if feat in ex['diff']: n_diffs += 1 self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) diff_len = len(str(ann.annotation[feat].seq)) - \ len(str(expected_seqs[feat])) self.assertEqual(diff_len, ex['lengths'][feat]) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) self.assertEqual(n_diffs, len(ex['diff'])) server.close() pass
def test_009_partialambigserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="006_partialambig") input_seq = self.data_dir + '/partial_ambig.fasta' for ex in self.expected['partial_ambig']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") print(str(i), allele) in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertTrue(ann.complete_annotation) self.assertEqual(ann.method, ex['method']) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(ann.annotation.keys()), 1) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) # Make sure only mapped feats exist for mf in ex['missing_feats']: self.assertFalse(mf in ann.annotation) for feat in ex['feats']: if feat in ex['diff']: self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) server.close() pass
def test_004_loader3(self): start = time.time() graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(server=server, verbose=True) pygfe = pyGFE(graph=graph, seqann=seqann, verbose=False, load_features=False, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/known_A.fasta", "fasta")) #typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.20.0") typing2 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0") server.close() end = time.time() time_taken = end - start print("TIME TAKEN: " + str(time_taken)) self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing2.status, "documented") self.assertIsInstance(typing2, Typing) # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01') # self.assertEqual(typing1.status, "documented") # self.assertIsInstance(typing1, Typing) pass
def test_001_load_features(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(server=server) #else: # seqann = BioSeqAnn() pygfe = pyGFE(graph=graph, seqann=seqann, verbose=True, load_features=True, verbosity=2, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) self.assertGreater(len(pygfe.gfe.structures), 1) self.assertGreater(len(pygfe.gfe.all_feats), 1) self.assertTrue('HLA-A' in pygfe.gfe.structures) self.assertFalse('HLA-Z' in pygfe.gfe.structures) pass
def test_005_A(self): #start = time.time() graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(server=server, dbversion="3200", verbose=True) pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=True, features=feats, seq2hla=seq2hla, gfe2hla=gfe2hla, gfe_feats=gfe_feats, cached_features=cached_feats, loci=["HLA-DQB1"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/A_fail.fasta", "fasta")) typing1 = pygfe.type_from_seq("HLA-DQB1", str(seqs[1].seq), "3.20.0") #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0") #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0") #end = time.time() #time_taken = end - start print(typing1) #print("=====") #print(typing2) # self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01') # self.assertEqual(typing2.status, "documented") #self.assertIsInstance(typing2, Typing) # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01') # self.assertEqual(typing1.status, "documented") self.assertIsInstance(typing1, Typing) pass
def test_000_pygfe(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb) seqann = BioSeqAnn(server=server, verbose=False) seqann = "X" #else: # print # seqann = BioSeqAnn() pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=False, load_all=True, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta")) typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq)) #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4') self.assertEqual(typing.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing.status, "novel") self.assertIsInstance(typing, Typing) pass
def test_021_stringseq(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="015_stringseq") input_seq = self.data_dir + '/exact_seqs.fasta' ex = self.expected['exact'][0] locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seqrec = list(SeqIO.parse(input_seq, "fasta"))[0] in_str = str(in_seqrec.seq) in_seq = in_seqrec.seq ann_str = seqann.annotate(in_str, locus) ann_seq = seqann.annotate(in_seq, locus) for annotation in [ann_str, ann_seq]: self.assertTrue(annotation.exact) self.assertIsNone(annotation.features) self.assertEqual(annotation.method, "match") self.assertIsInstance(annotation, Annotation) self.assertTrue(annotation.complete_annotation) self.assertGreater(len(annotation.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) expected_seqs = get_features(expected) self.assertEqual(annotation.gfe, ex['gfe']) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(annotation.annotation.keys()), 1) self.assertGreater(len(annotation.structure), 1) for feat in annotation.structure: self.assertIsInstance(feat, Feature) for feat in expected_seqs: if feat not in annotation.annotation: self.assertEqual(feat, None) else: self.assertEqual(str(expected_seqs[feat]), str(annotation.annotation[feat])) server.close() pass
def test_005_picklefiles(self): graph = Graph("http://ec2-34-207-175-160.compute-1.amazonaws.com:80", user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False) gfe = GFE() #cached_feats = gfe.all_feats # print("Finished loading cached_feats") # pickle_service = "feature-service.pickle" # with open(pickle_service, 'wb') as handle2: # pickle.dump(cached_feats, handle2, protocol=pickle.HIGHEST_PROTOCOL) feat_df = pd.DataFrame(graph.data(all_feats())) feat_df['ID'] = feat_df.apply(lambda row: ":".join([row['DB'], row['LOC'], str(row['RANK']), row['TERM'], row['SEQ']]), axis=1) feats = feat_df[['ID', 'ACCESSION']].set_index('ID').to_dict()['ACCESSION'] print("Finished loading feats") pickle_feats = "unique_db-feats.pickle" with open(pickle_feats, 'wb') as handle1: pickle.dump(feats, handle1, protocol=pickle.HIGHEST_PROTOCOL) gfedb = GfeDB(graph=graph, persist=False, verbose=False) act = ACT(gfedb=gfedb, seqann=seqann, load_gfe2hla=True, load_gfe2feat=True, load_seq2hla=True, gfe=gfe) print("Finished loading all!!") gfe2hla = act.gfe2hla seq2hla = act.seq2hla gfe2feat = act.gfe_feats pickle_gfe2feat = "gfe2feat.pickle" with open(pickle_gfe2feat, 'wb') as handle5: pickle.dump(gfe2feat, handle5, protocol=pickle.HIGHEST_PROTOCOL) pickle_gfe2hla = "gfe2hla.pickle" with open(pickle_gfe2hla, 'wb') as handle3: pickle.dump(gfe2hla, handle3, protocol=pickle.HIGHEST_PROTOCOL) pickle_seq2hla = "seq2hla.pickle" with open(pickle_seq2hla, 'wb') as handle4: pickle.dump(seq2hla, handle4, protocol=pickle.HIGHEST_PROTOCOL) pass
def test_015_fail(self): input_seq = self.data_dir + '/failed_seqs.fasta' in_seq = list(SeqIO.parse(input_seq, "fasta"))[0] server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="011_fail") self.assertFalse(seqann.refdata.seqref) self.assertFalse(seqann.refdata.hlaref) annotation = seqann.annotate(in_seq) self.assertFalse(annotation) server.close() pass
def test_013_nomatch(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="009_nomatch") self.assertIsInstance(seqann, BioSeqAnn) input_seq = self.data_dir + '/nomatch_seqs.fasta' in_seq = list(SeqIO.parse(input_seq, "fasta"))[0] annotation = seqann.annotate(in_seq, "HLA-A") self.assertIsInstance(annotation, Annotation) self.assertGreater(len(annotation.annotation.keys()), 1) self.assertTrue(annotation.complete_annotation) server.close() pass
def test_002_noserver(self): seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="002_noserver") self.assertIsInstance(seqann, BioSeqAnn) self.assertIsInstance(seqann.refdata, ReferenceData) self.assertGreater(len(seqann.refdata.hla_names), 10) self.assertEqual(seqann.refdata.structure_max['HLA-A'], 17) self.assertFalse(seqann.refdata.server_avail) self.assertGreater(len(seqann.refdata.seqref), 0) self.assertGreater(len(seqann.refdata.hlaref), 0) pass
def test_006_insertion(self): seqann = BioSeqAnn(verbosity=verbosity, pid="004_insertion") input_seq = self.data_dir + '/insertion_seqs.fasta' for ex in self.expected['insertion']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertEqual(ann.method, "nt_search") self.assertFalse(ann.missing) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) expected = seqann.refdata.hlaref[allele] self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) n_diffs = 0 expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) for feat in expected_seqs: if feat not in ann.annotation: self.assertEqual(feat, None) else: if feat in ex['diff']: n_diffs += 1 self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) diff_len = len(str(ann.annotation[feat].seq)) - \ len(str(expected_seqs[feat])) self.assertEqual(diff_len, ex['lengths'][feat]) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) self.assertEqual(n_diffs, len(ex['diff'])) pass
def test_017_logging(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) with self.assertLogs(level='INFO') as cm: seqann = BioSeqAnn(server=server, verbose=True) input_seq = self.data_dir + '/failed_seqs.fasta' in_seq = list(SeqIO.parse(input_seq, "fasta"))[0] annotation = seqann.annotate(in_seq) self.assertFalse(annotation) self.assertGreater(len(cm.output), 1) error = list(cm.output)[len(cm.output) - 1].split(":")[0] error_msg = list(cm.output)[len(cm.output) - 1].split("-")[1] self.assertEqual(error, "ERROR") self.assertEqual(error_msg, " Locus could not be determined!") server.close() pass
def test_010_partialambig(self): seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="006_partialambig") input_seq = self.data_dir + '/partial_ambig.fasta' for ex in self.expected['partial_ambig']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertTrue(ann.complete_annotation) self.assertEqual(ann.method, ex['method']) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) expected = seqann.refdata.hlaref[allele] expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(ann.annotation.keys()), 1) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) # Make sure only mapped feats exist for mf in ex['missing_feats']: self.assertFalse(mf in ann.annotation) for feat in ex['feats']: if feat in ex['diff']: self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) pass
def test_018_nogfe(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) with self.assertLogs(level='INFO') as cm: seqann = BioSeqAnn(server=server, verbose=True) input_seq = self.data_dir + '/failed_seqs.fasta' in_seq = list(SeqIO.parse(input_seq, "fasta"))[1] annotation = seqann.annotate(in_seq) self.assertFalse(annotation.gfe) self.assertFalse(annotation.structure) self.assertTrue(annotation.annotation) self.assertGreater(len(cm.output), 2) error = list(cm.output)[0].split(":")[0] error_msg = list(cm.output)[0].split("-")[1] self.assertEqual(error, "WARNING") self.assertEqual(error_msg, " Sequence alphabet contains non DNA") server.close() pass
def test_001_pygfe(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) seqann = BioSeqAnn(verbose=False, cached_features=cached_feats, align=True) pygfe = pyGFE(graph=graph, seqann=seqann, gfe_feats=gfe_feats, gfe2hla=gfe2hla, seq2hla=seq2hla, features=feats, verbose=False) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta")) typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq)) print(typing) #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4') self.assertEqual(typing.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing.status, "novel") self.assertIsInstance(typing, Typing) pass
def test_001_seqann(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="001_seqann") self.assertIsInstance(seqann, BioSeqAnn) self.assertIsInstance(seqann.refdata, ReferenceData) self.assertIsInstance(seqann.refdata, ReferenceData) self.assertGreater(len(seqann.refdata.hla_names), 10) self.assertEqual(seqann.refdata.structure_max['HLA-A'], 17) self.assertTrue(seqann.refdata.server_avail) server.close() pass
def annotate_get(sequence, locus=None, imgthla_version="3.31.0"): # noqa: E501 """annotate_get Find the sequence differences between two GFE # noqa: E501 :param sequence: Valid consensus sequence :type sequence: str :param locus: Valid locus :type locus: str :param imgthla_version: IMGT/HLA DB Version :type imgthla_version: str :param verbose: Flag for running service in verbose :type verbose: bool :rtype: Typing """ global seqanns typing = Typing() sequence = SeqRecord(seq=Seq(sequence)) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s' ) ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: annotation = seqann.annotate(sequence, locus) except: log_contents = log_capture_string.getvalue() return Error("An error occured during the annotation", log=log_contents.split("\n")), 404 if not annotation: log_contents = log_capture_string.getvalue() return Error("No annotation could be produced", log=log_contents.split("\n")), 404 if not hasattr(annotation, 'structure'): log_contents = log_capture_string.getvalue() return Error("No structure was produced", log=log_contents.split("\n")), 404 feats = [] for f in annotation.structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) feats.append(fn) typing.features = feats typing.gfe = annotation.gfe typing.imgtdb_version = imgthla_version return typing
def gfecreate_post(locus, sequence, imgt_version, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): # noqa: E501 """gfecreate_post Get all features associated with a locus :param locus: Valid HLA locus :param sequence: Valid sequence :param imgt_version : db version :rtype: Typing """ imgthla_version = imgt_version global seqanns global gfe_feats global gfe2hla global seq2hla pygfe = pyGFE() sequence = sequence['sequence'] log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter('%(asctime)s - %(name)-35s - %(levelname)-5s' ' - %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.gfe_create(locus=locus, sequence=sequence, imgtdb_version=db) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 structute_feats = [] for f in typing['structure']: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) structute_feats.append(fn) anno_feats = [] for f in typing['annotation'].structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) anno_feats.append(fn) return { 'gfe': typing['gfe'], 'feature': structute_feats, 'annotation_feature': anno_feats }
def releases_locus_get(imgt_releases, locus, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): """releases_locus_get Get all db releases :param imgt_releases: Valid imgt releases verion :param locus: Valid imgt releases verion :rtype: list of available db """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) db = "".join(imgt_releases.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: hla_list = pygfe.list_db_by_locus_imgt(locus, imgt_releases) except Exception as e: log_contents = log_capture_string.getvalue() print("The Error", e) return Error("hla list failed", log=log_contents.split("\n")), 404 if isinstance(hla_list, Error): log_contents = log_capture_string.getvalue() hla_list.log = log_contents.split("\n") return hla_list, 404 if not hla_list: log_contents = log_capture_string.getvalue() return Error("no data record found", log=log_contents.split("\n")), 404 return hla_list
def test_006_align(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(align=True, server=server, dbversion="3310", verbose=True) pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=True, features=feats, seq2hla=seq2hla, gfe2hla=gfe2hla, gfe_feats=gfe_feats, cached_features=cached_feats, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/align_tests.fasta", "fasta")) typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0") typing2 = pygfe.type_from_seq("HLA-A", str(seqs[1].seq), "3.31.0") typing3 = pygfe.type_from_seq("HLA-A", str(seqs[2].seq), "3.31.0") typing4 = pygfe.type_from_seq("HLA-A", str(seqs[3].seq), "3.31.0") self.assertEqual(typing1.hla, 'HLA-A*02:01:01:12') self.assertEqual(typing2.hla, 'HLA-A*02:01:01:12') self.assertEqual(typing3.hla, 'HLA-A*02:01:01:12') self.assertEqual(typing4.hla, 'HLA-A*02:01:01:12') #end = time.time() #time_taken = end - start #print(typing1) #print(typing1.aligned.keys()) #print(typing1.novel_features) #difss = pygfe.hla_seqdiff("HLA-A","3.31.0","HLA-A*01:01:01:01","HLA-A*01:01:01:07") #self.assertIsInstance(typing1, Typing) pass
def findkir_get(gfe, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): # noqa: E501 """findkir_get Get all kir associated with a GFE # noqa: E501 :param gfe: Valid gfe of locus :rtype: Typing """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) seqann = BioSeqAnn() try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.find_gfe_kir(gfe, pygfe.breakup_gfe(gfe)) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 return typing
def typeseq_get(sequence, locus=None, imgthla_version="3.31.0", neo4j_url="http://neo4j.b12x.org:80", user='******', password='******'): # noqa: E501 """typeseq_get Get HLA and GFE from consensus sequence or GFE notation # noqa: E501 :param locus: Valid HLA locus :type locus: str :param sequence: Consensus sequence :type sequence: str :param imgthla_version: IMGT/HLA DB Version :type imgthla_version: str :param neo4j_url: URL for the neo4j graph :type neo4j_url: str :param user: Username for the neo4j graph :type user: str :param password: Password for the neo4j graph :type password: str :param verbose: Flag for running service in verbose :type verbose: bool :rtype: Typing """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s' ) ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.type_from_seq(locus, sequence, imgthla_version) except: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type sequence failed", log=log_contents.split("\n")), 404 typing.gfedb_version = "2.0.0" return typing
def gfeAnnotation_post(sequence, locus, gene=None, imgtdb_version="3.31.0"): """gfeAnnotation_post Get all kir associated with a GFE # noqa: E501 :param sequence: Valid sequence fasta :param gene: the KIR param true or false :param locus: Valid Locus :param imgtdb_version: :rtype: Typing """ global seqanns typing = Typing() sequence = SeqRecord(seq=Seq(sequence['sequence'])) if not re.match(".", imgtdb_version): imgtdb_version = ".".join([list(imgtdb_version)[0], "".join(list(imgtdb_version)[1:3]), list(imgtdb_version)[3]]) db = "".join(imgtdb_version.split(".")) log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) # TODO: Use `gene` or locus to figure out the gene-family if db in seqanns: seqann = seqanns[db] elif gene: if gene.upper() == 'KIR': seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3, kir=True) seqanns.update({db: seqann}) else: # Defaults to HLA seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: annotation = seqann.annotate(sequence, locus) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("An error occurred during the annotation", log=log_contents.split("\n")), 404 if not annotation: log_contents = log_capture_string.getvalue() return Error("No annotation could be produced", log=log_contents.split("\n")), 404 if not hasattr(annotation, 'structure'): log_contents = log_capture_string.getvalue() return Error("No structure was produced", log=log_contents.split("\n")), 404 feats = [] for f in annotation.structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) feats.append(fn) typing.features = feats typing.gfe = annotation.gfe typing.imgtdb_version = imgtdb_version return typing