def releases_locus_get(imgt_release_version, locus, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): """releases_locus_get Get Alleles and their corresponding GFE By Locus and IMGT version :param imgt_release_version: Valid imgt release version number :param locus: Valid locus name :rtype: list of Alleles and their corresponding GFE """ log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 imgt_version = "".join(imgt_release_version.split(".")) try: hla_list = get_hla_gfe_by(graph, locus, imgt_version) except Exception as e: log_contents = log_capture_string.getvalue() print("The Error", e) return Error("hla list failed", log=log_contents.split("\n")), 404 if isinstance(hla_list, Error): log_contents = log_capture_string.getvalue() hla_list.log = log_contents.split("\n") return hla_list, 404 if not hla_list: log_contents = log_capture_string.getvalue() return Error("no data record found", log=log_contents.split("\n")), 404 return hla_list
def gfeNotation_post(sequence, locus, gene): """ gfeNotation_post GFE notations associated with the sequence :param locus: Valid HLA locus :param sequence: Valid sequence :param gene : Kir true or false :rtype: Feature and gfe """ kir = gene sequence = SeqRecord(seq=Seq(sequence['sequence'])) log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) gfe = GFE() if kir: seqann = BioSeqAnn(kir=True) else: seqann = BioSeqAnn() try: annotation = seqann.annotate(sequence) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("An error occured during the annotation", log=log_contents.split("\n")), 404 try: res_feature, res_gfe = gfe.get_gfe(annotation, locus) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("An error occurred in getting the gfe of annotation", log=log_contents.split("\n")), 404 feats = [] for f in res_feature: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) feats.append(fn) return {'gfe': res_gfe, 'feature': feats}
def allreleases_get(neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): """allreleases_get Get all db releases :rtype: list of available db """ log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 try: db_releases = list_all_db_releases(graph) except Exception as e: log_contents = log_capture_string.getvalue() print("The Error", e) return Error("Server Error getting IMGT versions.", log=log_contents.split("\n")), 404 if isinstance(db_releases, Error): log_contents = log_capture_string.getvalue() db_releases.log = log_contents.split("\n") return db_releases, 404 if not db_releases: log_contents = log_capture_string.getvalue() return Error("no data record found", log=log_contents.split("\n")), 404 return db_releases
def locusfeature_get(locus): # noqa: E501 """locusfeature_get Get all features associated with a locus # noqa: E501 :param locus: Valid HLA locus :rtype: list of features """ gfe = GFE() log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) try: feats = gfe.locus_features(locus) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("failed to load the feature of given locus", log=log_contents.split("\n")), 404 if isinstance(feats, Error): log_contents = log_capture_string.getvalue() feats.log = log_contents.split("\n") return feats, 404 if not feats: log_contents = log_capture_string.getvalue() return Error("no feature associated with the given locus", log=log_contents.split("\n")), 404 return feats
def gfecreate_post(locus, sequence, imgt_version, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): # noqa: E501 """gfecreate_post Get all features associated with a locus :param locus: Valid HLA locus :param sequence: Valid sequence :param imgt_version : db version :rtype: Typing """ imgthla_version = imgt_version global seqanns global gfe_feats global gfe2hla global seq2hla pygfe = pyGFE() sequence = sequence['sequence'] log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter('%(asctime)s - %(name)-35s - %(levelname)-5s' ' - %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.gfe_create(locus=locus, sequence=sequence, imgtdb_version=db) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 structute_feats = [] for f in typing['structure']: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) structute_feats.append(fn) anno_feats = [] for f in typing['annotation'].structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) anno_feats.append(fn) return { 'gfe': typing['gfe'], 'feature': structute_feats, 'annotation_feature': anno_feats }
def type_from_seq(self, locus: str = None, sequence: str = None, imgtdb_version: str = "3.31.0", nseqs: int = 20, alignseqs: int = 10, skip: List = []): """ creates GFE from HLA sequence and locus :param locus: string containing HLA locus. :param sequence: string containing sequence data. :return: GFEobject. """ # TODO: Add full gene accession # TODO: reformt dbversion if missing . ac_object = Typing() ac_object.imgtdb_version = "".join(imgtdb_version.split(".")) ac_object.pygfe_version = pygfe.__version__ ac_object.seqann_version = seqann.__version__ ac_object.gfedb_version = '0.0.2' # If sequence is now a biopython # sequence record convert it to one if isinstance(sequence, Seq): sequence = str(sequence) elif (isinstance(sequence, SeqRecord)): sequence = str(sequence.seq) if not ac_object.imgtdb_version in self.seqann: self.seqann.update({ ac_object.imgtdb_version: BioSeqAnn( dbversion=ac_object.imgtdb_version, #store_features=self.store_features, load_features=self.load_features, cached_features=self.cached_features) }) # If sequence contains any characters # other than ATCG then the GFE notation # can not be created valid_seq = checkseq(sequence) if self.verbose and not valid_seq: self.logger.warning(self.logname + " Sequence alphabet " + "contains non DNA") self.logger.warning(self.logname + " No GFE string will be generated") raise Exception( "Input sequence was not valid! {}".format(sequence)) # Check it the locus exists if not locus: if self.verbose: self.logger.info(self.logname + " No locus provided! ") # Guessing locus with blastn locus = get_locus( sequence, kir=self.kir, refdata=self.seqann[ac_object.imgtdb_version].refdata) if locus and self.verbose: self.logger.info(self.logname + " Locus prediction = " + locus) if not locus: if self.verbose: self.logger.error(self.logname + " Locus could not be determined!") # TODO: Raise exception raise Exception( "Locus could not be determined! {}".format(sequence)) sequence = sequence.upper() sequence_typing = self.sequence_lookup(locus, sequence, ac_object.imgtdb_version) if sequence_typing: ac_object.status = "documented" ac_object.hla = sequence_typing[0] ac_object.gfe = sequence_typing[1] ac_object.closest_gfe = sequence_typing[1] ac_object.features = sequence_typing[2] if self.verbose: self.logger.info(self.logname + locus + " sequence documented for " + imgtdb_version + " | " + ac_object.gfe + " = " + ac_object.hla) return ac_object else: # time GFE creation time_start = time.time() gfe_o = self.gfe_create(locus, sequence, ac_object.imgtdb_version) if not 'annotation' in gfe_o: self.logger.error(self.logname + "Failed to create annotation!!") error = Error("Failed to create annotation!!", ac_object.pygfe_version, ac_object.gfedb_version, imgtdb_version) return error if self.verbose: time_taken = int(time.time() - time_start) self.logger.info(self.logname + " gfe_create time for " + locus + " " + imgtdb_version + " = " + str(time_taken) + " minutes") annotation = gfe_o['annotation'] ac_object.gfe = gfe_o['gfe'] ac_object.features = [ Feature(accession=f.accession, rank=f.rank, sequence=f.sequence, term=f.term) for f in gfe_o['structure'] ] novel_features = self.unique_features(ac_object.features, locus, ac_object.imgtdb_version) if (len(novel_features) != 0): if self.verbose: self.logger.info(self.logname + " # novel features = " + str(len(novel_features))) ac_object.novel_features = novel_features ac_object.status = "novel" else: self.logger.info(self.logname + " novel combination") ac_object.status = "novel_combination" similar_results = self.find_similar(ac_object.gfe, ac_object.features, imgtdb_version) if similar_results: ac_object.hla = similar_results[0] ac_object.closest_gfe = similar_results[1] if self.seqann[ac_object.imgtdb_version].align: if self.verbose: self.logger.info(self.logname + " finding sequence differences") ac_object.seqdiff = self.diff_seq(similar_results[0], annotation, imgtdb_version) ac_object.differences = len(ac_object.seqdiff) else: ac_object.hla = "NA" ac_object.closest_gfe = "NA" if self.verbose: self.logger.warn(self.logname + " No allele call made!") return ac_object
def releases_locus_get(imgt_releases, locus, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): """releases_locus_get Get all db releases :param imgt_releases: Valid imgt releases verion :param locus: Valid imgt releases verion :rtype: list of available db """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) db = "".join(imgt_releases.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: hla_list = pygfe.list_db_by_locus_imgt(locus, imgt_releases) except Exception as e: log_contents = log_capture_string.getvalue() print("The Error", e) return Error("hla list failed", log=log_contents.split("\n")), 404 if isinstance(hla_list, Error): log_contents = log_capture_string.getvalue() hla_list.log = log_contents.split("\n") return hla_list, 404 if not hla_list: log_contents = log_capture_string.getvalue() return Error("no data record found", log=log_contents.split("\n")), 404 return hla_list
def gfeAnnotation_post(sequence, locus, gene=None, imgtdb_version="3.31.0"): """gfeAnnotation_post Get all kir associated with a GFE # noqa: E501 :param sequence: Valid sequence fasta :param gene: the KIR param true or false :param locus: Valid Locus :param imgtdb_version: :rtype: Typing """ global seqanns typing = Typing() sequence = SeqRecord(seq=Seq(sequence['sequence'])) if not re.match(".", imgtdb_version): imgtdb_version = ".".join([list(imgtdb_version)[0], "".join(list(imgtdb_version)[1:3]), list(imgtdb_version)[3]]) db = "".join(imgtdb_version.split(".")) log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) # TODO: Use `gene` or locus to figure out the gene-family if db in seqanns: seqann = seqanns[db] elif gene: if gene.upper() == 'KIR': seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3, kir=True) seqanns.update({db: seqann}) else: # Defaults to HLA seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: annotation = seqann.annotate(sequence, locus) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("An error occurred during the annotation", log=log_contents.split("\n")), 404 if not annotation: log_contents = log_capture_string.getvalue() return Error("No annotation could be produced", log=log_contents.split("\n")), 404 if not hasattr(annotation, 'structure'): log_contents = log_capture_string.getvalue() return Error("No structure was produced", log=log_contents.split("\n")), 404 feats = [] for f in annotation.structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) feats.append(fn) typing.features = feats typing.gfe = annotation.gfe typing.imgtdb_version = imgtdb_version return typing
def annotate_get(sequence, locus=None, imgthla_version="3.31.0"): # noqa: E501 """annotate_get Find the sequence differences between two GFE # noqa: E501 :param sequence: Valid consensus sequence :type sequence: str :param locus: Valid locus :type locus: str :param imgthla_version: IMGT/HLA DB Version :type imgthla_version: str :param verbose: Flag for running service in verbose :type verbose: bool :rtype: Typing """ global seqanns typing = Typing() sequence = SeqRecord(seq=Seq(sequence)) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s' ) ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: annotation = seqann.annotate(sequence, locus) except: log_contents = log_capture_string.getvalue() return Error("An error occured during the annotation", log=log_contents.split("\n")), 404 if not annotation: log_contents = log_capture_string.getvalue() return Error("No annotation could be produced", log=log_contents.split("\n")), 404 if not hasattr(annotation, 'structure'): log_contents = log_capture_string.getvalue() return Error("No structure was produced", log=log_contents.split("\n")), 404 feats = [] for f in annotation.structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) feats.append(fn) typing.features = feats typing.gfe = annotation.gfe typing.imgtdb_version = imgthla_version return typing
def typeseq_get(sequence, locus=None, imgthla_version="3.31.0", neo4j_url="http://neo4j.b12x.org:80", user='******', password='******'): # noqa: E501 """typeseq_get Get HLA and GFE from consensus sequence or GFE notation # noqa: E501 :param locus: Valid HLA locus :type locus: str :param sequence: Consensus sequence :type sequence: str :param imgthla_version: IMGT/HLA DB Version :type imgthla_version: str :param neo4j_url: URL for the neo4j graph :type neo4j_url: str :param user: Username for the neo4j graph :type user: str :param password: Password for the neo4j graph :type password: str :param verbose: Flag for running service in verbose :type verbose: bool :rtype: Typing """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s' ) ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.type_from_seq(locus, sequence, imgthla_version) except: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type sequence failed", log=log_contents.split("\n")), 404 typing.gfedb_version = "2.0.0" return typing
def findkir_get(gfe, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): # noqa: E501 """findkir_get Get all kir associated with a GFE # noqa: E501 :param gfe: Valid gfe of locus :rtype: Typing """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) seqann = BioSeqAnn() try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.find_gfe_kir(gfe, pygfe.breakup_gfe(gfe)) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 return typing