Beispiel #1
0
 def localHHBLITS(seq = "NWLGVKRQPLWTLVLILWPVIIFIILAITRTKFPP", database = "../data/PP2db", minEVal = 1):
   import time
   out.writeDebug("Do a local hhblits search for {} in {}".format( seq, database ) )
   char_set = string.ascii_uppercase + string.digits
   time_stamp = str(int(time.time())) + ''.join(random.sample(char_set*6, 40)) + str(random.randint(0, 100000000))
   seq_file = ''
   seq = str(seq)
   if re.match('^[A-Z]*$',seq):
     seq_file = 'hhblits_input_'+time_stamp
     fh = open(seq_file,'w')
     fh.write('>no_header\n'+seq)
     fh.close()
   else:
     seq_file = seq
   outfile = 'hhblits_'+time_stamp+'.out'
   command = "hhblits -i {} -o {} -d {} -e {} -n 1".format(seq_file, outfile, database, minEVal)
   try:
     hhblitsResults = subprocess.check_output(command, stderr=subprocess.STDOUT, shell = True)
     hhblitsResults = open(outfile).read()
     os.remove(outfile)
   except subprocess.CalledProcessError as err:
     out.writeLog("Return code for hhblits search {} in {} returned with exit code {}!".format( seq, database, err.returncode ) )
     hhblitsResults = ''
   if seq != seq_file:
     os.remove(seq_file)
   return HHBLITS(hhblitsResults)
 def getHpoTermById(self, id, log = True):
   
   """ returns an hpo term by an hpo id """
   
   try:
     return self.hpoTermsDict[id.split(" ")[0]]
   except KeyError:
     if log:
       out.writeLog( "KeyError getting term for id: \"" + str( id ) + "\"! => returning None!" )
     return None
Beispiel #3
0
 def localBlast(seq="NWLGVKRQPLWTLVLILWPVIIFIILAITRTKFPP", database="../data/genes_UniProt.fasta", minEVal=1):
     out.writeDebug("Do a local blast search for {} in {}".format(seq, database))
     blastResults = commands.getstatusoutput(
         'echo "{}" | blast2 -p blastp -d {} -N -e {} -m 7'.format(seq, database, minEVal)
     )
     if blastResults[0] != 0:
         out.writeLog(
             "Return code for blast search {} in {} returned with exit code {}!".format(
                 seq, database, blastResults[0]
             )
         )
     return Blast(blastResults[1])
    def predictSequence(args, hpoGraph, uni2hpoDict, name="Sequence", seq=""):
        # ok, do the whole thing
        try:
            # debug msg
            out.writeLog('Predict function for protein: id: "' + str(name) + '" sequence: "' + str(seq) + '"')

            # lookup resulst if available
            foundInLookUp, hits = False, []
            if args.lookupdb:
                out.writeLog("Checking for precalculated results!")
                # ok, load them
                f = open(args.lookupdb, "r")
                for line in f:
                    if line.strip() == name.strip():
                        # oh, cool, its precalculated
                        foundInLookUp = True
                    elif foundInLookUp and line.startswith("\t"):
                        # ok, this belongs to result, load it
                        m = re.search("\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)", line)
                        hits.append(
                            {
                                "method": m.group(1),
                                "hit_id": m.group(2),
                                "hit_value": float(m.group(3)),
                                "hit_from": int(m.group(4)),
                                "hit_to": int(m.group(5)),
                                "hit_order": bool(m.group(6)),
                            }
                        )
                    elif foundInLookUp:
                        break
                f.close()

            # ok, first of all, get similar sequences!
            if not foundInLookUp:
                out.writeLog("Check blast and hhblits for sequence orthologs!")
                blastResults = blast.Blast.localBlast(seq=seq, database=args.blastDbFile, minEVal=args.blastMinEVal)
                for hit in blastResults.hits:
                    out.writeDebug("Blast: found hit: " + str(hit))
                hhblitsResults = hhblits.HHBLITS.localHHBLITS(seq=str(seq), database=args.hhblitsDbFile)
                for hit in hhblitsResults.hits:
                    out.writeDebug("hhblits: found hit: " + str(hit))
                hits.extend(blastResults.hits)
                hits.extend(hhblitsResults.hits)

            # reduce hits if fast prediction
            if args.fast:
                out.writeLog("Reduce hits for faster prediction!")
                hitsTmp = sorted(hits, key=lambda t: t["hit_value"])
                hits = hitsTmp[:6]

            # now get the hpo-Identifiers for each similar sequence
            out.writeLog("uniprot ids ({}) 2 HPO Terms".format(len(hits)))
            for hit in hits:
                try:
                    # Do not output this, it might be some GB output
                    #          out.writeDebug("found hpoTerms for " + str( hit[ "hit_id" ] ) + ": " + str( uni2hpoDict[ hit[ "hit_id" ] ] ) )
                    hit.update({"hpoTerms": uni2hpoDict[hit["hit_id"]]})
                except KeyError:
                    out.writeWarning("MISSING HPO TERMS FOR HIT: " + str(hit))

            # build and merge trees
            out.writeLog("Build and merge tree for similar sequences!")
            graph, hit_id = hpoGraph.getHpoSubGraph(hpoGraph.getRoot()), 0
            for hit in hits:
                #        out.writeDebug("@blast merging: {}".format(hit))
                subtree = hpoGraph.getHpoSubGraph(hit["hpoTerms"], {hit_id: hit})
                hit_id += 1
                graph += subtree

            # do the prediciton
            out.writeLog("Run main prediction!")
            # init the predictor
            p = predictor.Predictor(args.neuronalNet)
            p.runprediction(seq, graph)
            # always accept the root
            for root in hpoGraph.getRoot():
                graph.getHpoTermById(root).accepted = 1

            # do the output
            out.writeLog("writing output")
            for node in graph.getAcceptedNodes(args.minimalConfidence):
                out.writeOutput("{}\t{}\t{}".format(name, node.id, "%.*f" % (2, (node.accepted + 2) / 4)))

            # svg image desired?
            if args.createSvgImage:
                out.writeLog("Create a svg image showing all results!")
                if graph != None:
                    graph.writeSvgImage(fileName=str(name) + ".svg")
                else:
                    out.writeWarning("Can't create a svg image from an empty tree!")

            # clear attrs from all tree nodes, so that these don't interfere with later predictions
        #      out.writeLog("Clear memory for next prediction")
        #      hpoGraph.clearAttr()

        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno))
            exit(1)
        pass
        help="The minimal confidance value an accepted node should have; [from -2 to 2] (default: 0.0)!",
    )
    parser.add_argument("--fast", action="store_true", dest="fast", help="Weather to perform a fast prediction!")
    args = parser.parse_args()

    # init output format
    out.supressMessage = bool(args.verbosity >> 0 & 1)
    out.supressDebug = bool(args.verbosity >> 1 & 1)
    out.supressLog = bool(args.verbosity >> 2 & 1)
    out.supressWarning = bool(args.verbosity >> 3 & 1)
    out.supressError = bool(args.verbosity >> 4 & 1)
    out.supressOutput = bool(args.verbosity >> 5 & 1)
    out.outputFormat = args.outputFormat

    # init the hpoParser
    out.writeLog("Build hpoGraph from file")
    hpoGraph = None
    if os.path.isfile(args.hpoFile):
        hpoGraph = hpoParser.HpoGraph(hpoFile=args.hpoFile)
    else:
        out.writeLog("missing hpoFile! Try standard hpoFile in the data directory")
        hpoGraph = hpoParser.HpoGraph()

    # init the hpo-identifier dict
    out.writeLog("Build uniprot 2 hpo dictionary")
    uni2hpoDict = {}
    f = open(args.uni2hpo, "r")
    for line in f:
        line = line.strip()
        uni2hpoDict.update({line.split("\t")[0]: line.split("\t")[1].split(",")})
    f.close()