Esempio n. 1
0
    def test_016_debug(self):
        seqann = BioSeqAnn(debug={
            "seqann": 5,
            "align": 1,
            "seq_search": 3,
            "refdata": 2
        })
        self.assertTrue(seqann.debug)
        self.assertEqual(seqann.verbosity, 5)
        self.assertEqual(seqann.align_verbosity, 1)
        self.assertEqual(seqann.seqsearch.verbosity, 3)
        self.assertEqual(seqann.refdata.verbosity, 2)

        seqann = BioSeqAnn(debug={"seqann": 2, "seq_search": 5})
        self.assertTrue(seqann.debug)
        self.assertEqual(seqann.verbosity, 2)
        self.assertEqual(seqann.align_verbosity, 0)
        self.assertEqual(seqann.seqsearch.verbosity, 5)
        self.assertEqual(seqann.refdata.verbosity, 0)

        seqann = BioSeqAnn(debug={"gfe": 2, "seq_search": 5})
        self.assertTrue(seqann.debug)
        self.assertTrue(seqann.gfe.verbose)
        self.assertEqual(seqann.gfe.verbosity, 2)
        self.assertEqual(seqann.seqsearch.verbosity, 5)
        self.assertEqual(seqann.refdata.verbosity, 0)

        seqann = BioSeqAnn(verbose=True, verbosity=3)
        self.assertFalse(seqann.debug)
        self.assertEqual(seqann.verbosity, 3)
        self.assertEqual(seqann.align_verbosity, 3)
        self.assertEqual(seqann.seqsearch.verbosity, 3)
        self.assertEqual(seqann.refdata.verbosity, 3)
        pass
Esempio n. 2
0
 def test_012_exact(self):
     seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="007_exact")
     input_seq = self.data_dir + '/exact_seqs.fasta'
     for ex in self.expected['exact']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         annotation = seqann.annotate(in_seq, locus)
         self.assertTrue(annotation.exact)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         expected = seqann.refdata.hlaref[allele]
         expected_seqs = get_features(expected)
         self.assertGreater(len(annotation.structure), 1)
         for feat in annotation.structure:
             self.assertIsInstance(feat, Feature)
         self.assertEqual(annotation.gfe, ex['gfe'])
         self.assertGreater(len(expected_seqs.keys()), 1)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     pass
Esempio n. 3
0
    def test_020_skip(self):
        # import logging
        # logging.basicConfig(format='%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s',
        #                     datefmt='%m/%d/%Y %I:%M:%S %p',
        #                     level=logging.INFO)
        seqann = BioSeqAnn(verbose=False)
        refdata = seqann.refdata
        test_list = [
            'HLA-C*07:241', 'HLA-A*01:07', 'HLA-A*01:01:59',
            'HLA-A*01:09:01:01', 'HLA-A*02:545', 'HLA-A*29:13',
            'HLA-A*24:03:02', 'HLA-A*02:544', 'HLA-DQA1*04:01:01:01',
            'HLA-A*01:217', 'HLA-A*01:22N', 'HLA-B*51:42', 'HLA-C*03:04:05',
            'HLA-A*01:01:01:04', 'HLA-A*01:09:01:01', 'HLA-B*82:01'
        ]

        for seqname in refdata.hlaref:
            if seqname not in test_list:
                continue
            print(seqname)
            seqrec = refdata.hlaref[seqname]
            locus = seqrec.description.split("*")[0]
            ann1 = seqann.annotate(seqrec, locus=locus)
            ann2 = seqann.annotate(seqrec, locus=locus, skip=[seqname])
            self.assertTrue(ann1.exact)
            self.assertEqual(len(ann2.annotation), len(ann1.annotation))
            self.assertEqual(ann1.gfe, ann2.gfe)
            self.assertGreater(len(ann2.structure), 1)
            for feat in ann2.structure:
                self.assertIsInstance(feat, Feature)
            for f in ann1.annotation:
                self.assertTrue(f in ann2.annotation)
                seq1 = str(ann1.annotation[f])
                seq2 = str(ann2.annotation[f].seq)
                self.assertEqual(seq1, seq2)
        pass
Esempio n. 4
0
    def test_001_align(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)

        # TODO *** NOT WORKING WITH NO LOCUS           ***
        # TODO *** FIX 3290 Alignments                 ***
        # TODO *** GET ALIGNMENTS WORKING WITH DB SEQS ***
        seqann = BioSeqAnn(server=server, align=True)
        input_seq = self.data_dir + '/align_tests.fasta'
        for ex in self.expected['align']:
            i = int(ex['index'])
            ex = self.expected['align'][i]
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            annotation = seqann.annotate(in_seq, "HLA-A")
            align = "".join(
                [annotation.aligned[s] for s in annotation.aligned])
            for i in range(0, len(align) - 1):
                if str(i) in ex['diffs']:
                    self.assertEqual(list(align)[i], ex['diffs'][str(i)])
                else:
                    if list(align)[i] != list(ex['alignment'])[i]:
                        print("FAILED:", allele, i,
                              list(align)[i],
                              list(ex['alignment'])[i])
                    self.assertEqual(list(align)[i], list(ex['alignment'])[i])
        server.close()
Esempio n. 5
0
def gfeNotation_post(sequence, locus, gene):
    """
    gfeNotation_post
        GFE notations associated with the sequence

        :param locus: Valid HLA locus
        :param sequence: Valid sequence
        :param gene : Kir true or false
        :rtype: Feature and gfe
    """
    kir = gene
    sequence = SeqRecord(seq=Seq(sequence['sequence']))
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    gfe = GFE()
    if kir:
        seqann = BioSeqAnn(kir=True)
    else:
        seqann = BioSeqAnn()

    try:
        annotation = seqann.annotate(sequence)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("An error occured during the annotation",
                     log=log_contents.split("\n")), 404
    try:
        res_feature, res_gfe = gfe.get_gfe(annotation, locus)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("An error occurred in getting the gfe of annotation",
                     log=log_contents.split("\n")), 404
    feats = []
    for f in res_feature:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        feats.append(fn)
    return {'gfe': res_gfe, 'feature': feats}
Esempio n. 6
0
 def test_005_insertionserv(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     server.close()
     pass
Esempio n. 7
0
    def test_009_partialambigserv(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        seqann = BioSeqAnn(server=server,
                           verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'

        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            print(str(i), allele)
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
            expected = db.lookup(name=allele)
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])

            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        server.close()
        pass
Esempio n. 8
0
 def test_004_loader3(self):
     start = time.time()
     graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                   bolt=False)
     #if conn():
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb, port=3307)
     seqann = BioSeqAnn(server=server, verbose=True)
     pygfe = pyGFE(graph=graph,
                   seqann=seqann,
                   verbose=False,
                   load_features=False,
                   load_gfe2hla=True,
                   load_seq2hla=True,
                   load_gfe2feat=True,
                   loci=["HLA-A"])
     self.assertIsInstance(pygfe, pyGFE)
     seqs = list(SeqIO.parse(self.data_dir + "/known_A.fasta", "fasta"))
     #typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.20.0")
     typing2 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0")
     server.close()
     end = time.time()
     time_taken = end - start
     print("TIME TAKEN: " + str(time_taken))
     self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01')
     self.assertEqual(typing2.status, "documented")
     self.assertIsInstance(typing2, Typing)
     # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01')
     # self.assertEqual(typing1.status, "documented")
     # self.assertIsInstance(typing1, Typing)
     pass
Esempio n. 9
0
 def test_001_load_features(self):
     graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                   bolt=False)
     #if conn():
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=3307)
     seqann = BioSeqAnn(server=server)
     #else:
     #    seqann = BioSeqAnn()
     pygfe = pyGFE(graph=graph,
                   seqann=seqann,
                   verbose=True,
                   load_features=True,
                   verbosity=2,
                   loci=["HLA-A"])
     self.assertIsInstance(pygfe, pyGFE)
     self.assertGreater(len(pygfe.gfe.structures), 1)
     self.assertGreater(len(pygfe.gfe.all_feats), 1)
     self.assertTrue('HLA-A' in pygfe.gfe.structures)
     self.assertFalse('HLA-Z' in pygfe.gfe.structures)
     pass
Esempio n. 10
0
    def test_005_A(self):
        #start = time.time()
        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=3307)
        seqann = BioSeqAnn(server=server, dbversion="3200", verbose=True)

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"
        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_features=False,
                      verbose=True,
                      features=feats,
                      seq2hla=seq2hla,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      cached_features=cached_feats,
                      loci=["HLA-DQB1"])
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/A_fail.fasta", "fasta"))
        typing1 = pygfe.type_from_seq("HLA-DQB1", str(seqs[1].seq), "3.20.0")
        #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0")
        #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0")
        #end = time.time()
        #time_taken = end - start
        print(typing1)
        #print("=====")
        #print(typing2)
        # self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01')
        # self.assertEqual(typing2.status, "documented")
        #self.assertIsInstance(typing2, Typing)
        # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01')
        # self.assertEqual(typing1.status, "documented")
        self.assertIsInstance(typing1, Typing)
        pass
Esempio n. 11
0
 def test_000_pygfe(self):
     graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                   bolt=False)
     #if conn():
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb)
     seqann = BioSeqAnn(server=server, verbose=False)
     seqann = "X"
     #else:
     #    print
     #    seqann = BioSeqAnn()
     pygfe = pyGFE(graph=graph,
                   seqann=seqann,
                   load_features=False,
                   verbose=False,
                   load_all=True,
                   loci=["HLA-A"])
     self.assertIsInstance(pygfe, pyGFE)
     seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta"))
     typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq))
     #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4')
     self.assertEqual(typing.hla, 'HLA-A*01:01:01:01')
     self.assertEqual(typing.status, "novel")
     self.assertIsInstance(typing, Typing)
     pass
Esempio n. 12
0
 def test_021_stringseq(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="015_stringseq")
     input_seq = self.data_dir + '/exact_seqs.fasta'
     ex = self.expected['exact'][0]
     locus = ex['locus']
     allele = ex['name']
     hla, loc = locus.split("-")
     in_seqrec = list(SeqIO.parse(input_seq, "fasta"))[0]
     in_str = str(in_seqrec.seq)
     in_seq = in_seqrec.seq
     ann_str = seqann.annotate(in_str, locus)
     ann_seq = seqann.annotate(in_seq, locus)
     for annotation in [ann_str, ann_seq]:
         self.assertTrue(annotation.exact)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         expected_seqs = get_features(expected)
         self.assertEqual(annotation.gfe, ex['gfe'])
         self.assertGreater(len(expected_seqs.keys()), 1)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         self.assertGreater(len(annotation.structure), 1)
         for feat in annotation.structure:
             self.assertIsInstance(feat, Feature)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     server.close()
     pass
Esempio n. 13
0
    def test_005_picklefiles(self):
        graph = Graph("http://ec2-34-207-175-160.compute-1.amazonaws.com:80", user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        seqann = BioSeqAnn(server=server, verbose=False)

        gfe = GFE()
        #cached_feats = gfe.all_feats
        # print("Finished loading cached_feats")
        # pickle_service = "feature-service.pickle"
        # with open(pickle_service, 'wb') as handle2:
        #     pickle.dump(cached_feats, handle2, protocol=pickle.HIGHEST_PROTOCOL)

        feat_df = pd.DataFrame(graph.data(all_feats()))
        feat_df['ID'] = feat_df.apply(lambda row: ":".join([row['DB'],
                                                            row['LOC'],
                                                            str(row['RANK']),
                                                            row['TERM'],
                                                            row['SEQ']]),
                                      axis=1)
        feats = feat_df[['ID', 'ACCESSION']].set_index('ID').to_dict()['ACCESSION']

        print("Finished loading feats")
        pickle_feats = "unique_db-feats.pickle"
        with open(pickle_feats, 'wb') as handle1:
            pickle.dump(feats, handle1, protocol=pickle.HIGHEST_PROTOCOL)

        gfedb = GfeDB(graph=graph, persist=False, verbose=False)
        act = ACT(gfedb=gfedb, seqann=seqann, load_gfe2hla=True,
                  load_gfe2feat=True,
                  load_seq2hla=True, gfe=gfe)

        print("Finished loading all!!")

        gfe2hla = act.gfe2hla
        seq2hla = act.seq2hla
        gfe2feat = act.gfe_feats

        pickle_gfe2feat = "gfe2feat.pickle"
        with open(pickle_gfe2feat, 'wb') as handle5:
            pickle.dump(gfe2feat, handle5, protocol=pickle.HIGHEST_PROTOCOL)

        pickle_gfe2hla = "gfe2hla.pickle"
        with open(pickle_gfe2hla, 'wb') as handle3:
            pickle.dump(gfe2hla, handle3, protocol=pickle.HIGHEST_PROTOCOL)

        pickle_seq2hla = "seq2hla.pickle"
        with open(pickle_seq2hla, 'wb') as handle4:
            pickle.dump(seq2hla, handle4, protocol=pickle.HIGHEST_PROTOCOL)

        pass
Esempio n. 14
0
 def test_015_fail(self):
     input_seq = self.data_dir + '/failed_seqs.fasta'
     in_seq = list(SeqIO.parse(input_seq, "fasta"))[0]
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="011_fail")
     self.assertFalse(seqann.refdata.seqref)
     self.assertFalse(seqann.refdata.hlaref)
     annotation = seqann.annotate(in_seq)
     self.assertFalse(annotation)
     server.close()
     pass
Esempio n. 15
0
 def test_013_nomatch(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="009_nomatch")
     self.assertIsInstance(seqann, BioSeqAnn)
     input_seq = self.data_dir + '/nomatch_seqs.fasta'
     in_seq = list(SeqIO.parse(input_seq, "fasta"))[0]
     annotation = seqann.annotate(in_seq, "HLA-A")
     self.assertIsInstance(annotation, Annotation)
     self.assertGreater(len(annotation.annotation.keys()), 1)
     self.assertTrue(annotation.complete_annotation)
     server.close()
     pass
Esempio n. 16
0
 def test_002_noserver(self):
     seqann = BioSeqAnn(verbose=False,
                        verbosity=verbosity,
                        pid="002_noserver")
     self.assertIsInstance(seqann, BioSeqAnn)
     self.assertIsInstance(seqann.refdata, ReferenceData)
     self.assertGreater(len(seqann.refdata.hla_names), 10)
     self.assertEqual(seqann.refdata.structure_max['HLA-A'], 17)
     self.assertFalse(seqann.refdata.server_avail)
     self.assertGreater(len(seqann.refdata.seqref), 0)
     self.assertGreater(len(seqann.refdata.hlaref), 0)
     pass
Esempio n. 17
0
 def test_006_insertion(self):
     seqann = BioSeqAnn(verbosity=verbosity, pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         expected = seqann.refdata.hlaref[allele]
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     pass
Esempio n. 18
0
    def test_017_logging(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)

        with self.assertLogs(level='INFO') as cm:
            seqann = BioSeqAnn(server=server, verbose=True)
            input_seq = self.data_dir + '/failed_seqs.fasta'
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[0]
            annotation = seqann.annotate(in_seq)
            self.assertFalse(annotation)

        self.assertGreater(len(cm.output), 1)
        error = list(cm.output)[len(cm.output) - 1].split(":")[0]
        error_msg = list(cm.output)[len(cm.output) - 1].split("-")[1]
        self.assertEqual(error, "ERROR")
        self.assertEqual(error_msg, " Locus could not be determined!")
        server.close()
        pass
Esempio n. 19
0
    def test_010_partialambig(self):
        seqann = BioSeqAnn(verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'
        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            expected = seqann.refdata.hlaref[allele]
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])
            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        pass
Esempio n. 20
0
    def test_018_nogfe(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)

        with self.assertLogs(level='INFO') as cm:
            seqann = BioSeqAnn(server=server, verbose=True)
            input_seq = self.data_dir + '/failed_seqs.fasta'
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[1]
            annotation = seqann.annotate(in_seq)
            self.assertFalse(annotation.gfe)
            self.assertFalse(annotation.structure)
            self.assertTrue(annotation.annotation)

        self.assertGreater(len(cm.output), 2)
        error = list(cm.output)[0].split(":")[0]
        error_msg = list(cm.output)[0].split("-")[1]
        self.assertEqual(error, "WARNING")
        self.assertEqual(error_msg, " Sequence alphabet contains non DNA")
        server.close()
        pass
Esempio n. 21
0
    def test_001_pygfe(self):
        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"

        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        seqann = BioSeqAnn(verbose=False, cached_features=cached_feats, align=True)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe_feats=gfe_feats,
                      gfe2hla=gfe2hla,
                      seq2hla=seq2hla,
                      features=feats,
                      verbose=False)
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta"))
        typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq))
        print(typing)
        #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4')
        self.assertEqual(typing.hla, 'HLA-A*01:01:01:01')
        self.assertEqual(typing.status, "novel")
        self.assertIsInstance(typing, Typing)
        pass
Esempio n. 22
0
 def test_001_seqann(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="001_seqann")
     self.assertIsInstance(seqann, BioSeqAnn)
     self.assertIsInstance(seqann.refdata, ReferenceData)
     self.assertIsInstance(seqann.refdata, ReferenceData)
     self.assertGreater(len(seqann.refdata.hla_names), 10)
     self.assertEqual(seqann.refdata.structure_max['HLA-A'], 17)
     self.assertTrue(seqann.refdata.server_avail)
     server.close()
     pass
def annotate_get(sequence, locus=None, imgthla_version="3.31.0"):  # noqa: E501
    """annotate_get

    Find the sequence differences between two GFE # noqa: E501

    :param sequence: Valid consensus sequence
    :type sequence: str
    :param locus: Valid locus
    :type locus: str
    :param imgthla_version: IMGT/HLA DB Version
    :type imgthla_version: str
    :param verbose: Flag for running service in verbose
    :type verbose: bool

    :rtype: Typing
    """
    global seqanns

    typing = Typing()
    sequence = SeqRecord(seq=Seq(sequence))

    if not re.match(".", imgthla_version):
        imgthla_version = ".".join([
            list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]),
            list(imgthla_version)[3]
        ])

    db = "".join(imgthla_version.split("."))
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s'
    )
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})

    try:
        annotation = seqann.annotate(sequence, locus)
    except:
        log_contents = log_capture_string.getvalue()
        return Error("An error occured during the annotation",
                     log=log_contents.split("\n")), 404

    if not annotation:
        log_contents = log_capture_string.getvalue()
        return Error("No annotation could be produced",
                     log=log_contents.split("\n")), 404

    if not hasattr(annotation, 'structure'):
        log_contents = log_capture_string.getvalue()
        return Error("No structure was produced",
                     log=log_contents.split("\n")), 404

    feats = []
    for f in annotation.structure:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        feats.append(fn)

    typing.features = feats
    typing.gfe = annotation.gfe
    typing.imgtdb_version = imgthla_version
    return typing
def gfecreate_post(locus,
                   sequence,
                   imgt_version,
                   neo4j_url=neo_dict['neo4j_url'],
                   user=neo_dict['user'],
                   password=neo_dict['password']):  # noqa: E501
    """gfecreate_post

    Get all features associated with a locus

    :param locus: Valid HLA locus
    :param sequence: Valid sequence
    :param imgt_version : db version
    :rtype: Typing
    """
    imgthla_version = imgt_version
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla
    pygfe = pyGFE()
    sequence = sequence['sequence']
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter('%(asctime)s - %(name)-35s - %(levelname)-5s'
                                  ' - %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    if not re.match(".", imgthla_version):
        imgthla_version = ".".join([
            list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]),
            list(imgthla_version)[3]
        ])

    db = "".join(imgthla_version.split("."))
    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})
    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)
    try:
        typing = pygfe.gfe_create(locus=locus,
                                  sequence=sequence,
                                  imgtdb_version=db)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404

    if isinstance(typing, Error):
        log_contents = log_capture_string.getvalue()
        typing.log = log_contents.split("\n")
        return typing, 404

    if not typing:
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404
    structute_feats = []
    for f in typing['structure']:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        structute_feats.append(fn)
    anno_feats = []
    for f in typing['annotation'].structure:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        anno_feats.append(fn)
    return {
        'gfe': typing['gfe'],
        'feature': structute_feats,
        'annotation_feature': anno_feats
    }
Esempio n. 25
0
def releases_locus_get(imgt_releases,
                       locus,
                       neo4j_url=neo_dict['neo4j_url'],
                       user=neo_dict['user'],
                       password=neo_dict['password']):
    """releases_locus_get

        Get all db releases

    :param imgt_releases: Valid imgt releases verion
    :param locus: Valid imgt releases verion
    :rtype: list of available db
    """
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    db = "".join(imgt_releases.split("."))
    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})

    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)
    try:
        hla_list = pygfe.list_db_by_locus_imgt(locus, imgt_releases)
    except Exception as e:
        log_contents = log_capture_string.getvalue()
        print("The Error", e)
        return Error("hla list failed", log=log_contents.split("\n")), 404

    if isinstance(hla_list, Error):
        log_contents = log_capture_string.getvalue()
        hla_list.log = log_contents.split("\n")
        return hla_list, 404

    if not hla_list:
        log_contents = log_capture_string.getvalue()
        return Error("no data record found", log=log_contents.split("\n")), 404
    return hla_list
Esempio n. 26
0
    def test_006_align(self):

        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=3307)
        seqann = BioSeqAnn(align=True, server=server, dbversion="3310", verbose=True)

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"
        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_features=False,
                      verbose=True,
                      features=feats,
                      seq2hla=seq2hla,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      cached_features=cached_feats,
                      loci=["HLA-A"])
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/align_tests.fasta", "fasta"))
        typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0")
        typing2 = pygfe.type_from_seq("HLA-A", str(seqs[1].seq), "3.31.0")
        typing3 = pygfe.type_from_seq("HLA-A", str(seqs[2].seq), "3.31.0")
        typing4 = pygfe.type_from_seq("HLA-A", str(seqs[3].seq), "3.31.0")
        self.assertEqual(typing1.hla, 'HLA-A*02:01:01:12')
        self.assertEqual(typing2.hla, 'HLA-A*02:01:01:12')
        self.assertEqual(typing3.hla, 'HLA-A*02:01:01:12')
        self.assertEqual(typing4.hla, 'HLA-A*02:01:01:12')
        #end = time.time()
        #time_taken = end - start
        #print(typing1)
        #print(typing1.aligned.keys())
        #print(typing1.novel_features)
        #difss = pygfe.hla_seqdiff("HLA-A","3.31.0","HLA-A*01:01:01:01","HLA-A*01:01:01:07")

        #self.assertIsInstance(typing1, Typing)
        pass
def findkir_get(gfe,
                neo4j_url=neo_dict['neo4j_url'],
                user=neo_dict['user'],
                password=neo_dict['password']):  # noqa: E501
    """findkir_get

    Get all kir associated with a GFE # noqa: E501

    :param gfe: Valid gfe of locus
    :rtype: Typing
    """
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla

    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    seqann = BioSeqAnn()

    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)
    try:
        typing = pygfe.find_gfe_kir(gfe, pygfe.breakup_gfe(gfe))
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404

    if isinstance(typing, Error):
        log_contents = log_capture_string.getvalue()
        typing.log = log_contents.split("\n")
        return typing, 404

    if not typing:
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404
    return typing
Esempio n. 28
0
def typeseq_get(sequence,
                locus=None,
                imgthla_version="3.31.0",
                neo4j_url="http://neo4j.b12x.org:80",
                user='******',
                password='******'):  # noqa: E501
    """typeseq_get

    Get HLA and GFE from consensus sequence or GFE notation # noqa: E501

    :param locus: Valid HLA locus
    :type locus: str
    :param sequence: Consensus sequence
    :type sequence: str
    :param imgthla_version: IMGT/HLA DB Version
    :type imgthla_version: str
    :param neo4j_url: URL for the neo4j graph
    :type neo4j_url: str
    :param user: Username for the neo4j graph
    :type user: str
    :param password: Password for the neo4j graph
    :type password: str
    :param verbose: Flag for running service in verbose
    :type verbose: bool

    :rtype: Typing
    """
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla

    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s'
    )
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    if not re.match(".", imgthla_version):
        imgthla_version = ".".join([
            list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]),
            list(imgthla_version)[3]
        ])

    db = "".join(imgthla_version.split("."))
    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})

    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)

    try:
        typing = pygfe.type_from_seq(locus, sequence, imgthla_version)
    except:
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404

    if isinstance(typing, Error):
        log_contents = log_capture_string.getvalue()
        typing.log = log_contents.split("\n")
        return typing, 404

    if not typing:
        log_contents = log_capture_string.getvalue()
        return Error("Type sequence failed", log=log_contents.split("\n")), 404

    typing.gfedb_version = "2.0.0"
    return typing
def gfeAnnotation_post(sequence, locus, gene=None, imgtdb_version="3.31.0"):
    """gfeAnnotation_post

        Get all kir associated with a GFE # noqa: E501

        :param sequence: Valid sequence fasta
        :param gene: the KIR param true or false
        :param locus: Valid Locus
        :param imgtdb_version:
        :rtype: Typing
        """
    global seqanns

    typing = Typing()
    sequence = SeqRecord(seq=Seq(sequence['sequence']))

    if not re.match(".", imgtdb_version):
        imgtdb_version = ".".join([list(imgtdb_version)[0],
                                    "".join(list(imgtdb_version)[1:3]),
                                   list(imgtdb_version)[3]])

    db = "".join(imgtdb_version.split("."))
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    # TODO: Use `gene` or locus to figure out the gene-family
    if db in seqanns:
        seqann = seqanns[db]
    elif gene:
        if gene.upper() == 'KIR':
            seqann = BioSeqAnn(verbose=True, safemode=True,
                               dbversion=db, verbosity=3, kir=True)
            seqanns.update({db: seqann})
    else:
        # Defaults to HLA
        seqann = BioSeqAnn(verbose=True, safemode=True,
                           dbversion=db, verbosity=3)
        seqanns.update({db: seqann})

    try:
        annotation = seqann.annotate(sequence, locus)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("An error occurred during the annotation",
                     log=log_contents.split("\n")), 404

    if not annotation:
        log_contents = log_capture_string.getvalue()
        return Error("No annotation could be produced",
                     log=log_contents.split("\n")), 404

    if not hasattr(annotation, 'structure'):
        log_contents = log_capture_string.getvalue()
        return Error("No structure was produced",
                     log=log_contents.split("\n")), 404

    feats = []
    for f in annotation.structure:
        fn = Feature(accession=f.accession, rank=f.rank,
                     term=f.term, sequence=f.sequence)
        feats.append(fn)

    typing.features = feats
    typing.gfe = annotation.gfe
    typing.imgtdb_version = imgtdb_version
    return typing