def run(self, **kwargs): self.run_parameters = kwargs data = self.database.get_pyml_dataset(self.features, **kwargs) data.attachKernel('gaussian', gamma=kwargs['gamma'], normalization='cosine') # normalization='cosine' if self.classifier == Classifier.SVM: # svm = SVM() svm = SVM(optimizer='pegasos') training, testing = self.database.get_cv_folds(kwargs['folds']) self.pyml_result = cvFromFolds(svm, data, training, testing, numFolds=kwargs['folds'], verbose=False) self.get_rfpp() if kwargs['save']: self.__save_results()
def learn(classified, histograms): clf = SVM() total_samples = 0 for c in classified.keys(): cim = classified[c] total_samples = total_samples + len(cim) samples = [] labels = [] for c in classified.keys(): cim = classified[c] for im in cim: hist = histograms[im] row = [] for j in range(NUM_BINS): row.append(cv.QueryHistValue_1D(hist, j)) samples.append(row) labels.append(c) data = VectorDataSet(samples, L=labels) print str(data) clf.train(data) return clf
from PyML import SparseDataSet, SVM __author__ = 'basir' data = SparseDataSet('data/heartSparse.data', labelsColumn=-1) svm = SVM() res = svm.cv(data, 5) for fold in res: print fold print res # print data # help(sequenceData.spectrum_data)
def init(): '''Inits classifier with optimal options.''' return SVM(C=10, optimization='liblinear')
def svm_prediction(peptides, job_id, input_train="SVM_POS_NEG.fasta"): """ Makes a final prediction based on SVM training files. This code is used for prediciton of blind datasets, based on the training datasets of positives and negatives. :param peptides: input peptides :param job_id: random job id assigned prior to start predicting :param input_train: input positive and negative examples used in training :return: returns SVM scores for each inputed peptide """ print("Begin SVM") # from methods import load_sqlite, store_sqlite global PATH global TMP_PATH # suppress SVM output devnull = open(os.devnull, 'w') sys.stdout, sys.stderr = devnull, devnull svm_scores = [] # query the database # for peptide in peptides: # try: # score = load_sqlite(peptide, method="SVM", unique=True) # svm_scores.append(score) # except: # pass if len(peptides) == len(svm_scores): pass else: # generate a svm input from the peptides rand = job_id input_svm = "%s_svm.fasta" % rand output_tmp = open(os.path.join(TMP_PATH, input_svm), "w") count = 0 for peptide in peptides: count += 1 output_tmp.write("> %i label=%s\n%s\n" % (count, 1, peptide)) for peptide in peptides: count += 1 output_tmp.write("> %i label=%s\n%s\n" % (count, -1, peptide)) output_tmp.close() # outputs model_svm = "%s_svm_model.txt" % rand # train data train_data = SequenceData(os.path.join(PATH, input_train), mink=1, maxk=1, maxShift=0, headerHandler=svm_process_header) train_data.attachKernel('cosine') cval = 1 s = SVM(C=cval) s.train(train_data) s.save(os.path.join(TMP_PATH, model_svm)) # load trained SVM loaded_svm = loadSVM(os.path.join(TMP_PATH, model_svm), train_data) # test data test_data = SequenceData(os.path.join(TMP_PATH, input_svm), mink=1, maxk=1, maxShift=0, headerHandler=svm_process_header) test_data.attachKernel('cosine') results = loaded_svm.test(test_data) # print results out output_svm = "%s_svm.txt" % rand results.toFile(os.path.join(TMP_PATH, output_svm)) # load results process output (positives + negatives) infile = open(os.path.join(TMP_PATH, output_svm), "r") inlines = infile.readlines() infile.close() scores = list() for line in inlines: line = line.rstrip("\r\n") try: entry = int(line.split("\t")[0]) score = float(line.split("\t")[1]) label = int(line.split("\t")[3]) if label != "-1": scores.append([entry, score]) except: pass # order list sorted_scores = sorted(scores, key=lambda scores: scores[0]) svm_scores = list() for score in sorted_scores: svm_score = score[1] svm_scores.append(svm_score) # remove the temporary model files and results try: os.remove(os.path.join(TMP_PATH, input_svm)) os.remove(os.path.join(TMP_PATH, model_svm)) os.remove(os.path.join(TMP_PATH, output_svm)) except: pass # save the peptides in db # for peptide, score in zip(peptides, svm_scores): # store_sqlite(peptide, method="SVM", information=score, save=True) # restore normal output sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ print("End SVM") return svm_scores
def __init__(self): self.Features = SVMFeatures() self.TDFeatures = SVMTDFeatures() self.svminstance = SVM()