Esempio n. 1
0
    def predictSequence(args, hpoGraph, uni2hpoDict, name="Sequence", seq=""):
        # ok, do the whole thing
        try:
            # debug msg
            out.writeLog('Predict function for protein: id: "' + str(name) + '" sequence: "' + str(seq) + '"')

            # lookup resulst if available
            foundInLookUp, hits = False, []
            if args.lookupdb:
                out.writeLog("Checking for precalculated results!")
                # ok, load them
                f = open(args.lookupdb, "r")
                for line in f:
                    if line.strip() == name.strip():
                        # oh, cool, its precalculated
                        foundInLookUp = True
                    elif foundInLookUp and line.startswith("\t"):
                        # ok, this belongs to result, load it
                        m = re.search("\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)", line)
                        hits.append(
                            {
                                "method": m.group(1),
                                "hit_id": m.group(2),
                                "hit_value": float(m.group(3)),
                                "hit_from": int(m.group(4)),
                                "hit_to": int(m.group(5)),
                                "hit_order": bool(m.group(6)),
                            }
                        )
                    elif foundInLookUp:
                        break
                f.close()

            # ok, first of all, get similar sequences!
            if not foundInLookUp:
                out.writeLog("Check blast and hhblits for sequence orthologs!")
                blastResults = blast.Blast.localBlast(seq=seq, database=args.blastDbFile, minEVal=args.blastMinEVal)
                for hit in blastResults.hits:
                    out.writeDebug("Blast: found hit: " + str(hit))
                hhblitsResults = hhblits.HHBLITS.localHHBLITS(seq=str(seq), database=args.hhblitsDbFile)
                for hit in hhblitsResults.hits:
                    out.writeDebug("hhblits: found hit: " + str(hit))
                hits.extend(blastResults.hits)
                hits.extend(hhblitsResults.hits)

            # reduce hits if fast prediction
            if args.fast:
                out.writeLog("Reduce hits for faster prediction!")
                hitsTmp = sorted(hits, key=lambda t: t["hit_value"])
                hits = hitsTmp[:6]

            # now get the hpo-Identifiers for each similar sequence
            out.writeLog("uniprot ids ({}) 2 HPO Terms".format(len(hits)))
            for hit in hits:
                try:
                    # Do not output this, it might be some GB output
                    #          out.writeDebug("found hpoTerms for " + str( hit[ "hit_id" ] ) + ": " + str( uni2hpoDict[ hit[ "hit_id" ] ] ) )
                    hit.update({"hpoTerms": uni2hpoDict[hit["hit_id"]]})
                except KeyError:
                    out.writeWarning("MISSING HPO TERMS FOR HIT: " + str(hit))

            # build and merge trees
            out.writeLog("Build and merge tree for similar sequences!")
            graph, hit_id = hpoGraph.getHpoSubGraph(hpoGraph.getRoot()), 0
            for hit in hits:
                #        out.writeDebug("@blast merging: {}".format(hit))
                subtree = hpoGraph.getHpoSubGraph(hit["hpoTerms"], {hit_id: hit})
                hit_id += 1
                graph += subtree

            # do the prediciton
            out.writeLog("Run main prediction!")
            # init the predictor
            p = predictor.Predictor(args.neuronalNet)
            p.runprediction(seq, graph)
            # always accept the root
            for root in hpoGraph.getRoot():
                graph.getHpoTermById(root).accepted = 1

            # do the output
            out.writeLog("writing output")
            for node in graph.getAcceptedNodes(args.minimalConfidence):
                out.writeOutput("{}\t{}\t{}".format(name, node.id, "%.*f" % (2, (node.accepted + 2) / 4)))

            # svg image desired?
            if args.createSvgImage:
                out.writeLog("Create a svg image showing all results!")
                if graph != None:
                    graph.writeSvgImage(fileName=str(name) + ".svg")
                else:
                    out.writeWarning("Can't create a svg image from an empty tree!")

            # clear attrs from all tree nodes, so that these don't interfere with later predictions
        #      out.writeLog("Clear memory for next prediction")
        #      hpoGraph.clearAttr()

        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno))
            exit(1)
        pass
Esempio n. 2
0
 def __init__(self, hpoFile="../data/hp.obo"):
   
   """ initalize an hpo graph by an hpo file """
   
   # debug message
   if hpoFile != None:
     out.writeDebug( "parsing hpo file " + str( hpoFile ) )
   # init main class varibale
   self.hpoTermsDict = {}
   self.isSubTree = hpoFile == None
   # if the file to parse is None, an empty HpoGraph will be returned
   if hpoFile == None:
     return
   # helper function to analyse the lines
   def _analyseLines(self, lines):
     
     """ Analyse the parsed lines (helper function) """
     
     # file descriptor or hp term?
     if lines[0].startswith( "[Term]" ):
       # add a hpoterm by the hpoterms description
       for line in lines:
         # do nothing, if HpoTerm is_obsolete
         if line.startswith('is_obsolete:'):
           return
       term = HpoTerm( lines[1:] )
       self.hpoTermsDict.update( { term.id.split(" ")[0] : term } )
     else:
       for line in lines:
         # ok, get the position of the :
         attrName = line[:line.find(":")].strip()
         attrVal = line[line.find(":")+1:].strip()
         # now add this as attribute
         if hasattr(self, attrName):
           if isinstance(getattr(self, attrName), list):
             getattr(self, attrName).append( attrVal )
           else:
             setattr(self, attrName, [ getattr(self, attrName), attrVal ])
         else:
           setattr(self, attrName, attrVal)
   # ok, parse the lines in the file
   try:
     f = file( hpoFile, "r" )
     lines = []
     for line in f:
       # skip empty lines
       if line.strip() == "":
         continue
       # do something for non empty lines
       if line.startswith( "[Term]" ):
         _analyseLines(self, lines)
         lines = [ line ]
       else:
         lines.append(line)
     _analyseLines(self, lines)
     f.close()
   except Exception as e:
     out.writeError("Error parsing hpo file " + str( e.message ) + " " + str( e.args) )
   # good and now create the relation ship childrens
   for key in self.hpoTermsDict:
     node = self.hpoTermsDict[key]
     if hasattr(node, "is_a"):
       if isinstance(node.is_a, list):
         for element in node.is_a:
           self.hpoTermsDict[ element.split(" ")[0] ].childrens.append(key)
       else:
         self.hpoTermsDict[ node.is_a.split(" ")[0] ].childrens.append(key)
Esempio n. 3
0
            exc_type, exc_obj, exc_tb = sys.exc_info()
            out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno))
            exit(1)
        pass

    # printheader output
    out.writeOutput("AUTHOR TEAM_NAME")
    out.writeOutput("MODEL\t1")
    out.writeOutput("KEYWORDS clinical data, synteny.")

    # ok, do the whole thing
    if args.sequence != None:
        predictSequence(args, hpoGraph, uni2hpoDict, seq=args.sequence)
    elif os.path.isfile(args.fastaFile):
        f = open(args.fastaFile, "rU")
        for record in SeqIO.parse(f, "fasta"):
            predictSequence(args, hpoGraph, uni2hpoDict, name=record.id, seq=str(record.seq))
        f.close()
    else:
        out.writeError("Error: no sequence to predict given! (wrong path?)")

    out.writeOutput("END")

    # quit without error code
    exit(0)

except Exception as err:
    # main routine exception handler
    out.writeError("Unexpected Error: " + str(err))
    exit(1)