def mkdtst_test(fname, window, answer): # For test """ >>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')] None """ if answer is not None: ans_sgr = util.ans(answer) ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1 for idch, start, pssm in parse_pssm4pos(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, { pos: (ans2int(idch, start + pos), mkvec(pssm, window, pos)) for pos in range(len(pssm)) } else: # For negative dataset for idch, pssm in parse_pssm(fname): idch = idch.strip() yield idch, { pos: (-1, mkvec(pssm, window, pos)) for pos in range(len(pssm)) }
def getpos_fasta(fname,answer): """ >>> fname = "../dataset/mono.d4.0.nxg.miss.fasta" >>> answer = "../dataset/answer_mono.d4.0.nxg.txt" >>> getpos_fasta(fname,answer).next() None """ # yielding answer position residue ans_sgr = util.ans(answer) ans2int = lambda idch,pos: 1 if ans_sgr.isans(idch,pos) else -1 for idch,start,seq in seq2feature.fasta2seq(fname): idch = idch.strip() yield idch,{pos + start:res for pos,res in enumerate(seq) if ans_sgr.isans(idch,start + pos)}
def mkdtst_test(self,window,answer): # For test """ >>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')] None """ ans_sgr = util.ans(answer) ans2int = lambda idch,pos: 1 if ans_sgr.isans(idch,pos) else -1 for idch,start,v_pssm in self.parse_pssm4pos(): idch = idch.strip() idchs = [(idch,pos + start) for pos in range(len(v_pssm))] dataset = [self.mkvec(v_pssm,window,pos) for pos in range(len(v_pssm))] label = [ans2int(idch,start + pos) for pos in range(len(v_pssm))] yield idch,idchs,label,dataset
def mkdtst_test(fname, window, ftr, answer): # For test """ >>> import feature >>> [i for i in mkdtst_test('.test.fasta',10,lambda seq:feature.seq2frq(seq),answer='../dataset/answer_monod4.0.cluster1.txt')] None """ ans_sgr = util.ans(answer) ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1 for idch, start, seq in fasta2seq(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, {pos: (ans2int(idch, start + pos), mkvec(ftr, seq, window, pos)) for pos in range(len(seq))}
def mkdtst_test4svr(fname, window, answer): # For test """ >>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')] None """ ans_sgr = util.ans(answer) for idch, start, pssm in parse_pssm4pos(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, { pos: (ans_sgr.get_dist(start + pos, idch), mkvec(pssm, window, pos)) for pos in range(len(pssm)) }
def mkdtst_train(self,window,answer): """ >>> import feature >>> [i for i in mkdtst_train('.test.pssm',10,answer='.answer.test')] None """ # only binding site For make positive dataet ans_sgr = util.ans(answer) # ans2int is function that return +1 if True else return -1 ans2int = lambda idch,pos: 1 if ans_sgr.isans(idch,pos) else -1 for idch,start,v_pssm in self.parse_pssm4pos(): idch = idch.strip() idchs = [(idch,pos + start) for pos in range(len(v_pssm))] dataset = [self.mkvec(v_pssm,window,pos) for pos in range(len(v_pssm)) if ans_sgr.isans(idch,start + pos)] label = [ans2int(idch,start + pos) for pos in range(len(v_pssm)) if ans_sgr.isans(idch,start + pos)] yield idch,idchs,label,dataset
def mkdtst_neg(self,window,answer,size = 1): """ >>> import feature >>> [i for i in mkdtst_neg('.test.pssm',10,'.answer.test',1)] None """ ans_sgr = util.ans(answer) # To Dataset randomly choiced positions for idch,start,pssm in self.parse_pssm4pos(): _indx = [i for i in range(1,len(pssm)) if not ans_sgr.isans(idch,i + start)] random.shuffle(_indx) selected_pos = _indx[:size*len(ans_sgr.get_pos(idch))] idch = idch.strip() idchs = [(idch,pos + start) for pos in selected_pos] dataset = [self.mkvec(pssm,window,pos) for pos in selected_pos] label = [-1 for pos in selected_pos] yield idch,idchs,label,dataset
def mkdtst_near(fname, window, ftr, answer, low, up): # For test """ >>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')] None """ ans_sgr = util.ans(answer) ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1 for idch, start, seq in fasta2seq(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, { pos: (ans2int(idch, start + pos), mkvec(ftr, seq, window, pos)) for pos in range(len(seq)) if low <= ans_sgr.get_dist(pos + start, idch) <= up or ans_sgr.isans(idch, start + pos) }
def mkdtst_train(fname, window, answer): """ >>> import feature >>> [i for i in mkdtst_train('.test.pssm',10,answer='.answer.test')] None """ # only binding site For make positive dataet ans_sgr = util.ans(answer) # ans2int is function that return +1 if True else return -1 ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1 for idch, start, pssm in parse_pssm4pos(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, { pos: (ans2int(idch, start + pos), mkvec(pssm, window, pos)) for pos in range(len(pssm)) if ans_sgr.isans(idch, start + pos) }
def mkdtst_near(fname, window, answer, low, up): # For test """ >>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')] None """ ans_sgr = util.ans(answer) ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1 for idch, start, pssm in parse_pssm4pos(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, { pos: (ans2int(idch, start + pos), mkvec(pssm, window, pos)) for pos in range(len(pssm)) if low <= ans_sgr.get_dist(pos + start, idch) <= up or ans_sgr.isans(idch, start + pos) }
def mkdtst_train(fname, window, ftr, answer): """ >>> import feature >>> [i for i in mkdtst_train('.test.fasta',10,lambda seq:feature.seq2frq(seq),answer='../dataset/answer_monod4.0.cluster1.txt')] None """ # only binding site For make positive dataet ans_sgr = util.ans(answer) # ans2int is function that return +1 if True else return -1 ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1 for idch, start, seq in fasta2seq(fname): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch, { pos: (ans2int(idch, start + pos), mkvec(ftr, seq, window, pos)) for pos in range(len(seq)) if ans_sgr.isans(idch, start + pos) }
def make_dataset(self,window,answer): # For test """ >>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')] None """ if answer is not None: answer = util.ans(answer) ans2int = lambda idch,pos: 1 if answer.isans(idch,pos) else -1 else: # if answer is None. this is unknown sample. ans2int = lambda idch,pos: 0 for idch,v_pssm in self.parse_pssm(): idch = idch.strip() idchs = [(idch,pos) for pos in range(len(v_pssm))] dataset = [self.mkvec(v_pssm,window,pos) for pos in range(len(v_pssm))] label = [ans2int(idch,pos) for pos in range(len(v_pssm))] yield idch,idchs,label,dataset
def iter_idch2result(idch,name,window,c,g,fans,fname): sgr_ans = util.ans(fans) start = starts(fname)[idch] for pos,val in enumerate(iter_desc(name,window,c,g,idch)): yield pos + start,sgr_ans.isans(idch,pos + start),val