def getRoot(self, multiRootLog = True): """ returns the root of this graph, write a log if there're more than one root """ # create the list to return result = [] # add all objects, that have don't have an is_a relation chip or which parents are not available in the graph for key in self.hpoTermsDict: # check, if parent is not in graph if self.getParents( key ) == []: result.append( key ) # check log if multiRootLog and len(result) != 1: out.writeWarning("WARNING: found unexpected multiple (or none) roots in graph!") # return this return result
def predictSequence(args, hpoGraph, uni2hpoDict, name="Sequence", seq=""): # ok, do the whole thing try: # debug msg out.writeLog('Predict function for protein: id: "' + str(name) + '" sequence: "' + str(seq) + '"') # lookup resulst if available foundInLookUp, hits = False, [] if args.lookupdb: out.writeLog("Checking for precalculated results!") # ok, load them f = open(args.lookupdb, "r") for line in f: if line.strip() == name.strip(): # oh, cool, its precalculated foundInLookUp = True elif foundInLookUp and line.startswith("\t"): # ok, this belongs to result, load it m = re.search("\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)", line) hits.append( { "method": m.group(1), "hit_id": m.group(2), "hit_value": float(m.group(3)), "hit_from": int(m.group(4)), "hit_to": int(m.group(5)), "hit_order": bool(m.group(6)), } ) elif foundInLookUp: break f.close() # ok, first of all, get similar sequences! if not foundInLookUp: out.writeLog("Check blast and hhblits for sequence orthologs!") blastResults = blast.Blast.localBlast(seq=seq, database=args.blastDbFile, minEVal=args.blastMinEVal) for hit in blastResults.hits: out.writeDebug("Blast: found hit: " + str(hit)) hhblitsResults = hhblits.HHBLITS.localHHBLITS(seq=str(seq), database=args.hhblitsDbFile) for hit in hhblitsResults.hits: out.writeDebug("hhblits: found hit: " + str(hit)) hits.extend(blastResults.hits) hits.extend(hhblitsResults.hits) # reduce hits if fast prediction if args.fast: out.writeLog("Reduce hits for faster prediction!") hitsTmp = sorted(hits, key=lambda t: t["hit_value"]) hits = hitsTmp[:6] # now get the hpo-Identifiers for each similar sequence out.writeLog("uniprot ids ({}) 2 HPO Terms".format(len(hits))) for hit in hits: try: # Do not output this, it might be some GB output # out.writeDebug("found hpoTerms for " + str( hit[ "hit_id" ] ) + ": " + str( uni2hpoDict[ hit[ "hit_id" ] ] ) ) hit.update({"hpoTerms": uni2hpoDict[hit["hit_id"]]}) except KeyError: out.writeWarning("MISSING HPO TERMS FOR HIT: " + str(hit)) # build and merge trees out.writeLog("Build and merge tree for similar sequences!") graph, hit_id = hpoGraph.getHpoSubGraph(hpoGraph.getRoot()), 0 for hit in hits: # out.writeDebug("@blast merging: {}".format(hit)) subtree = hpoGraph.getHpoSubGraph(hit["hpoTerms"], {hit_id: hit}) hit_id += 1 graph += subtree # do the prediciton out.writeLog("Run main prediction!") # init the predictor p = predictor.Predictor(args.neuronalNet) p.runprediction(seq, graph) # always accept the root for root in hpoGraph.getRoot(): graph.getHpoTermById(root).accepted = 1 # do the output out.writeLog("writing output") for node in graph.getAcceptedNodes(args.minimalConfidence): out.writeOutput("{}\t{}\t{}".format(name, node.id, "%.*f" % (2, (node.accepted + 2) / 4))) # svg image desired? if args.createSvgImage: out.writeLog("Create a svg image showing all results!") if graph != None: graph.writeSvgImage(fileName=str(name) + ".svg") else: out.writeWarning("Can't create a svg image from an empty tree!") # clear attrs from all tree nodes, so that these don't interfere with later predictions # out.writeLog("Clear memory for next prediction") # hpoGraph.clearAttr() except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno)) exit(1) pass