from subprocess import call from classes.FeatureSet import FeatureSet # Parameters: # # -p: File name for positive feature set (any file type) # -n: File name for negative feature set (any file type) # -o: Name of output LibSVM model opts, extraparams = getopt.getopt(sys.argv[1:], 'o:p:n:') for o,p in opts: if o == '-p': posPath = p if o == '-n': negPath = p if o == '-o': outPath = p # Aggregate inputs, export to libsvm file fs = FeatureSet() fs.load('data/'+posPath, patternClass = 'real') fs.add_instances('data/'+negPath, patternClass = 'pseudo') fs.weka_smote() fs.libsvm_scale(paramOut = 'models/'+outPath+'.scale') fs.export('tmp.libsvm') # Build model # Micropred: -c 100 -d 1 -h 1 -e 0.001 -g 0.0019531 # HeteroMir: -c 1 -d 1 -h 1 -e 0.001 -g 0.06 call('progs/libsvm-3.14/svm-train -c 1 -d 1 -h 1 -e 0.001 -g 0.06 -b 1 tmp.libsvm models/'+outPath+'.model', shell=True) # Clean up call('rm tmp.libsvm', shell=True)
call('python extract_hairpins.py -i '+negPath, shell=True) print "### Extracting micropred features from coding regions" sl = SequenceList() sl.load_fasta('data/'+negPath+'.nr.hairpins') sl.select_random(10000) sl.export_fasta('data/'+negPath+'.nr.hairpins') call('python build_micropred_features.py -i '+negPath+'.nr.hairpins -n '+numThreads, shell=True) # call('python build_huntmi_features.py -i '+negPath+'.nr.hairpins') ################################################ # Build LibSVM model ################################################ print "### Building LibSVM model" call('python build_model.py -p '+speciesFilename+'.features -n '+negPath+'.nr.hairpins.micropred -o '+speciesFilename, shell=True) ################################################ # Build feature set from hairpin candidates in genome of interest ################################################ print "### Building hairpins from genome under exploration" call('python extract_hairpins.py -i '+inPath, shell=True) print "### Extracting micropred features from genome under exploration" call('python build_micropred_features.py -i '+inPath+'.nr.hairpins -n '+numThreads, shell=True) ################################################ # Run svm-predict on all hairpin candidates in genome of interest ################################################ fs = FeatureSet() fs.load('data/'+inPath+'.nr.hairpins.micropred', patternClass = 'real') fs.libsvm_scale(params='models/'+speciesFilename+'.scale') fs.export('data/'+inPath+'.nr.hairpins.libsvm') call('progs/libsvm-3.14/svm-predict -b 1 data/'+inPath+'.nr.hairpins.libsvm models/'+speciesFilename+'.model data/'+inPath+'.nr.hairpins.results', shell=True)
import sys import getopt from subprocess import call from classes.FeatureSet import FeatureSet from classes.ResultSet import ResultSet # Parameters: # # -m: model name (<-m>.scale and <-m>.model should exist in models directory) # -i: File containing input feature data opts, extraparams = getopt.getopt(sys.argv[1:], 'm:i:') for o,p in opts: if o == '-m': modelName = p if o == '-i': inPath = p print '1' fs = FeatureSet() print '2' fs.load('data/'+inPath, patternClass = 'real') print '3' fs.libsvm_scale(params='models/'+modelName+'.scale') print '4' fs.export('tmp.libsvm') call('progs/libsvm-3.14/svm-predict -b 1 tmp.libsvm models/'+modelName+'.model data/'+inPath+'.results', shell=True) # call('rm tmp.libsvm', shell=True)
import getopt, sys from classes.FeatureSet import FeatureSet opts, extraparams = getopt.getopt(sys.argv[1:], 'i:f:') for o, p in opts: if o == '-i': inPath = p if o == '-f': outFormat = p inFormat = inPath.split('.')[-1] noFormatName = "" for text in inPath.split('.')[:-1]: noFormatName += text noFormatName += '.' outPath = noFormatName + outFormat if inFormat in [ 'micropred', 'features', 'huntmi', 'csv', 'svm', 'libsvm', 'arff' ]: fs = FeatureSet() fs.load(inPath) fs.export(outPath)