def computePOIM(x,y,poim_degree,kernel_degree,savepath): feats_train = StringCharFeatures(x,DNA) labels = BinaryLabels(y); kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kernel_degree) C=1 svm = LibSVM(C, kernel, labels) svm.train() tally=len(x[0]) ma = poim.compute_poims(svm,kernel,poim_degree,tally) fobj = open(savepath,'wb') pickle.dump(ma,fobj) fobj.close() return ma
def computePOIM(x, y, poim_degree, kernel_degree, savepath): feats_train = StringCharFeatures(x, DNA) labels = BinaryLabels(np.array(y)) print "compute kernel matrix" kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kernel_degree) C = 1 svm = LibSVM(C, kernel, labels) print "train support vector machine" svm.train() pdb.set_trace() tally = len(x[0]) print "compute poim" ma = poim.compute_poims(svm, kernel, poim_degree, tally) fobj = open(savepath, "wb") pickle.dump(ma, fobj) fobj.close() return ma
def svm_poim(argv): """A top level script to parse input parameters and plot poims""" assert(argv[1]=='poim') if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1) # parse input parameters C = float(argv[2]) poimdegree = int(argv[3]) (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False) (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest) if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<3: if argv_rest[-1] == 'dna': sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n") sys.exit(-1) elif argv_rest[-1] == 'protein': sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n") sys.exit(-1) else: sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n") sys.exit(-1) if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1) poimfilename = argv_rest[0] seq_source = argv_rest[1] nuc_con = argv_rest[2] utils.check_params(kparam, C, len(examples[0])) # train svm and compute POIMs (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con) print "done with training " (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0])) # plot poims plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))
def svm_poim(argv): """A top level script to parse input parameters and plot poims""" assert(argv[1]=='poim') if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1) # parse input parameters C = float(argv[2]) poimdegree = int(argv[3]) (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False) (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest) if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<3: if argv_rest[-1] == 'dna': sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n") sys.exit(-1) elif argv_rest[-1] == 'protein': sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n") sys.exit(-1) else: sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n") sys.exit(-1) if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1) poimfilename = argv_rest[0] seq_source = argv_rest[1] nuc_con = argv_rest[2] utils.check_params(kparam, C, len(examples[0])) # train svm and compute POIMs (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con) (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0])) # plot poims plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))