def get_dataset(fname,k): labels = [] features = [] for prot_id,seq in seq2feature.parse_fasta(fname): labels.append(prot_id) features.append(k_spec(seq,k)) return labels,features
def split_fasta(fname, save_dir): for cnt, (idch, seq) in enumerate(seq2feature.parse_fasta(fname)): if cnt > MAX: #break pass wfname = base64.b64encode(idch.strip().replace("\t", "")) with open("%s/%s.fasta" % (save_dir, wfname), "w") as fout: fout.write(">%s\n" % (idch.strip())) fout.write(seq + "\n")
def split_fasta(fname,save_dir): for cnt,(idch,seq) in enumerate(seq2feature.parse_fasta(fname)): if cnt > MAX: #break pass wfname = base64.b64encode(idch.strip().replace("\t","")) with open("%s/%s.fasta" % (save_dir,wfname),"w") as fout: fout.write(">%s\n" % (idch.strip())) fout.write(seq + "\n")
def get_dataset(fname,k): labels = [] features = [] for prot_id,seq in seq2feature.parse_fasta(fname): labels.append(prot_id) features.append(k_spec(seq,k)) return labels,features if __name__ == "__main__": parser = argparse.ArgumentParser(description='Predict X binding proteins.') parser.add_argument('-model',action="store",dest="model") parser.add_argument('-thr',action="store",dest="thr",type = float) parser.add_argument('-fname',action="store",dest="fname") model = parser.parse_args().model fname = parser.parse_args().fname thr = parser.parse_args().thr labels,features = get_dataset(fname,2) model = svmutil.svm_load_model(model) plbl, pacc, pvals = svmutil.svm_predict([0]*len(features),features,model,"") for cnt,(prot_id,seq) in enumerate(seq2feature.parse_fasta(fname)): pval = pvals[cnt][0] if pval >= thr: print "> %s:%f" % (prot_id,pval) print seq