def predictSequence(args, hpoGraph, uni2hpoDict, name="Sequence", seq=""): # ok, do the whole thing try: # debug msg out.writeLog('Predict function for protein: id: "' + str(name) + '" sequence: "' + str(seq) + '"') # lookup resulst if available foundInLookUp, hits = False, [] if args.lookupdb: out.writeLog("Checking for precalculated results!") # ok, load them f = open(args.lookupdb, "r") for line in f: if line.strip() == name.strip(): # oh, cool, its precalculated foundInLookUp = True elif foundInLookUp and line.startswith("\t"): # ok, this belongs to result, load it m = re.search("\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)", line) hits.append( { "method": m.group(1), "hit_id": m.group(2), "hit_value": float(m.group(3)), "hit_from": int(m.group(4)), "hit_to": int(m.group(5)), "hit_order": bool(m.group(6)), } ) elif foundInLookUp: break f.close() # ok, first of all, get similar sequences! if not foundInLookUp: out.writeLog("Check blast and hhblits for sequence orthologs!") blastResults = blast.Blast.localBlast(seq=seq, database=args.blastDbFile, minEVal=args.blastMinEVal) for hit in blastResults.hits: out.writeDebug("Blast: found hit: " + str(hit)) hhblitsResults = hhblits.HHBLITS.localHHBLITS(seq=str(seq), database=args.hhblitsDbFile) for hit in hhblitsResults.hits: out.writeDebug("hhblits: found hit: " + str(hit)) hits.extend(blastResults.hits) hits.extend(hhblitsResults.hits) # reduce hits if fast prediction if args.fast: out.writeLog("Reduce hits for faster prediction!") hitsTmp = sorted(hits, key=lambda t: t["hit_value"]) hits = hitsTmp[:6] # now get the hpo-Identifiers for each similar sequence out.writeLog("uniprot ids ({}) 2 HPO Terms".format(len(hits))) for hit in hits: try: # Do not output this, it might be some GB output # out.writeDebug("found hpoTerms for " + str( hit[ "hit_id" ] ) + ": " + str( uni2hpoDict[ hit[ "hit_id" ] ] ) ) hit.update({"hpoTerms": uni2hpoDict[hit["hit_id"]]}) except KeyError: out.writeWarning("MISSING HPO TERMS FOR HIT: " + str(hit)) # build and merge trees out.writeLog("Build and merge tree for similar sequences!") graph, hit_id = hpoGraph.getHpoSubGraph(hpoGraph.getRoot()), 0 for hit in hits: # out.writeDebug("@blast merging: {}".format(hit)) subtree = hpoGraph.getHpoSubGraph(hit["hpoTerms"], {hit_id: hit}) hit_id += 1 graph += subtree # do the prediciton out.writeLog("Run main prediction!") # init the predictor p = predictor.Predictor(args.neuronalNet) p.runprediction(seq, graph) # always accept the root for root in hpoGraph.getRoot(): graph.getHpoTermById(root).accepted = 1 # do the output out.writeLog("writing output") for node in graph.getAcceptedNodes(args.minimalConfidence): out.writeOutput("{}\t{}\t{}".format(name, node.id, "%.*f" % (2, (node.accepted + 2) / 4))) # svg image desired? if args.createSvgImage: out.writeLog("Create a svg image showing all results!") if graph != None: graph.writeSvgImage(fileName=str(name) + ".svg") else: out.writeWarning("Can't create a svg image from an empty tree!") # clear attrs from all tree nodes, so that these don't interfere with later predictions # out.writeLog("Clear memory for next prediction") # hpoGraph.clearAttr() except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno)) exit(1) pass
def __init__(self, hpoFile="../data/hp.obo"): """ initalize an hpo graph by an hpo file """ # debug message if hpoFile != None: out.writeDebug( "parsing hpo file " + str( hpoFile ) ) # init main class varibale self.hpoTermsDict = {} self.isSubTree = hpoFile == None # if the file to parse is None, an empty HpoGraph will be returned if hpoFile == None: return # helper function to analyse the lines def _analyseLines(self, lines): """ Analyse the parsed lines (helper function) """ # file descriptor or hp term? if lines[0].startswith( "[Term]" ): # add a hpoterm by the hpoterms description for line in lines: # do nothing, if HpoTerm is_obsolete if line.startswith('is_obsolete:'): return term = HpoTerm( lines[1:] ) self.hpoTermsDict.update( { term.id.split(" ")[0] : term } ) else: for line in lines: # ok, get the position of the : attrName = line[:line.find(":")].strip() attrVal = line[line.find(":")+1:].strip() # now add this as attribute if hasattr(self, attrName): if isinstance(getattr(self, attrName), list): getattr(self, attrName).append( attrVal ) else: setattr(self, attrName, [ getattr(self, attrName), attrVal ]) else: setattr(self, attrName, attrVal) # ok, parse the lines in the file try: f = file( hpoFile, "r" ) lines = [] for line in f: # skip empty lines if line.strip() == "": continue # do something for non empty lines if line.startswith( "[Term]" ): _analyseLines(self, lines) lines = [ line ] else: lines.append(line) _analyseLines(self, lines) f.close() except Exception as e: out.writeError("Error parsing hpo file " + str( e.message ) + " " + str( e.args) ) # good and now create the relation ship childrens for key in self.hpoTermsDict: node = self.hpoTermsDict[key] if hasattr(node, "is_a"): if isinstance(node.is_a, list): for element in node.is_a: self.hpoTermsDict[ element.split(" ")[0] ].childrens.append(key) else: self.hpoTermsDict[ node.is_a.split(" ")[0] ].childrens.append(key)
exc_type, exc_obj, exc_tb = sys.exc_info() out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno)) exit(1) pass # printheader output out.writeOutput("AUTHOR TEAM_NAME") out.writeOutput("MODEL\t1") out.writeOutput("KEYWORDS clinical data, synteny.") # ok, do the whole thing if args.sequence != None: predictSequence(args, hpoGraph, uni2hpoDict, seq=args.sequence) elif os.path.isfile(args.fastaFile): f = open(args.fastaFile, "rU") for record in SeqIO.parse(f, "fasta"): predictSequence(args, hpoGraph, uni2hpoDict, name=record.id, seq=str(record.seq)) f.close() else: out.writeError("Error: no sequence to predict given! (wrong path?)") out.writeOutput("END") # quit without error code exit(0) except Exception as err: # main routine exception handler out.writeError("Unexpected Error: " + str(err)) exit(1)