def get_predictions(self, sequence, positions): seqlen = self.window_right + self.window_left + 2 num = len(positions) testdat = [] for j in xrange(num): i = positions[j] - self.offset s = sequence[i - self.window_left:i + self.window_right + 2] testdat.append(s) t = StringCharFeatures(DNA) t.set_string_features(testdat) self.wd_kernel.init(self.traindat, t) l = self.svm.classify().get_labels() sys.stderr.write("\n...done...\n") return l
def get_predictions(self, sequence, positions): seqlen=self.window_right+self.window_left+2 num=len(positions) testdat = [] for j in xrange(num): i=positions[j] - self.offset ; s=sequence[i-self.window_left:i+self.window_right+2] testdat.append(s) t=StringCharFeatures(DNA) t.set_string_features(testdat) self.wd_kernel.init(self.traindat, t) l=self.svm.classify().get_labels() sys.stderr.write("\n...done...\n") return l
def get_predictions_from_seqdict(self, seqdic, site): """ we need to generate a huge test features object containing all locations found in each seqdict-sequence and each location (this is necessary to efficiently (==fast,low memory) compute the splice outputs """ seqlen=self.window_right+self.window_left+2 num=0 for s in seqdic: num+= len(s.preds[site].positions) testdat = [] for s in seqdic: sequence=s.seq positions=s.preds[site].positions for j in xrange(len(positions)): i=positions[j] - self.offset s=sequence[i-self.window_left:i+self.window_right+2] testdat.append(s) t=StringCharFeatures(DNA) t.set_string_features(testdat) self.wd_kernel.init(self.traindat, t) l=self.svm.classify().get_labels() sys.stderr.write("\n...done...\n") k=0 for s in seqdic: num=len(s.preds[site].positions) scores= num * [0] for j in xrange(num): scores[j]=l[k] k+=1 s.preds[site].set_scores(scores)