Ejemplo n.º 1
0
 def run(self, **kwargs):
     self.run_parameters = kwargs
     data = self.database.get_pyml_dataset(self.features, **kwargs)
     data.attachKernel('gaussian', gamma=kwargs['gamma'], normalization='cosine')  # normalization='cosine'
     if self.classifier == Classifier.SVM:
         # svm = SVM()
         svm = SVM(optimizer='pegasos')
         training, testing = self.database.get_cv_folds(kwargs['folds'])
         self.pyml_result = cvFromFolds(svm, data, training, testing, numFolds=kwargs['folds'], verbose=False)
         self.get_rfpp()
     if kwargs['save']:
         self.__save_results()
Ejemplo n.º 2
0
def learn(classified, histograms):
    clf = SVM()

    total_samples = 0
    for c in classified.keys():
        cim = classified[c]
        total_samples = total_samples + len(cim)

    samples = []
    labels = []
    for c in classified.keys():
        cim = classified[c]
        for im in cim:
            hist = histograms[im]
            row = []
            for j in range(NUM_BINS):
                row.append(cv.QueryHistValue_1D(hist, j))
            samples.append(row)
            labels.append(c)

    data = VectorDataSet(samples, L=labels)
    print str(data)
    clf.train(data)
    return clf
Ejemplo n.º 3
0
from PyML import SparseDataSet, SVM

__author__ = 'basir'

data = SparseDataSet('data/heartSparse.data', labelsColumn=-1)
svm = SVM()
res = svm.cv(data, 5)
for fold in res:
    print fold
print res
# print data
# help(sequenceData.spectrum_data)
Ejemplo n.º 4
0
def init():
    '''Inits classifier with optimal options.'''
    return SVM(C=10, optimization='liblinear')
Ejemplo n.º 5
0
def svm_prediction(peptides, job_id, input_train="SVM_POS_NEG.fasta"):
    """
    Makes a final prediction based on SVM training files.
    This code is used for prediciton of blind datasets, based on the training
    datasets of positives and negatives.

    :param peptides: input peptides
    :param job_id: random job id assigned prior to start predicting
    :param input_train: input positive and negative examples used in training
    :return: returns SVM scores for each inputed peptide
    """

    print("Begin SVM")

    # from methods import load_sqlite, store_sqlite

    global PATH
    global TMP_PATH

    # suppress SVM output
    devnull = open(os.devnull, 'w')
    sys.stdout, sys.stderr = devnull, devnull

    svm_scores = []
    # query the database
    # for peptide in peptides:
    #     try:
    #         score = load_sqlite(peptide, method="SVM", unique=True)
    #         svm_scores.append(score)
    #     except:
    #         pass

    if len(peptides) == len(svm_scores):
        pass
    else:

        # generate a svm input from the peptides
        rand = job_id
        input_svm = "%s_svm.fasta" % rand
        output_tmp = open(os.path.join(TMP_PATH, input_svm), "w")

        count = 0
        for peptide in peptides:
            count += 1
            output_tmp.write("> %i label=%s\n%s\n" % (count, 1, peptide))
        for peptide in peptides:
            count += 1
            output_tmp.write("> %i label=%s\n%s\n" % (count, -1, peptide))
        output_tmp.close()

        # outputs
        model_svm = "%s_svm_model.txt" % rand

        # train data
        train_data = SequenceData(os.path.join(PATH, input_train),
                                  mink=1,
                                  maxk=1,
                                  maxShift=0,
                                  headerHandler=svm_process_header)
        train_data.attachKernel('cosine')

        cval = 1
        s = SVM(C=cval)
        s.train(train_data)
        s.save(os.path.join(TMP_PATH, model_svm))

        # load trained SVM
        loaded_svm = loadSVM(os.path.join(TMP_PATH, model_svm), train_data)

        # test data
        test_data = SequenceData(os.path.join(TMP_PATH, input_svm),
                                 mink=1,
                                 maxk=1,
                                 maxShift=0,
                                 headerHandler=svm_process_header)
        test_data.attachKernel('cosine')
        results = loaded_svm.test(test_data)

        # print results out
        output_svm = "%s_svm.txt" % rand
        results.toFile(os.path.join(TMP_PATH, output_svm))

        # load results process output (positives + negatives)
        infile = open(os.path.join(TMP_PATH, output_svm), "r")
        inlines = infile.readlines()
        infile.close()
        scores = list()
        for line in inlines:
            line = line.rstrip("\r\n")
            try:
                entry = int(line.split("\t")[0])
                score = float(line.split("\t")[1])
                label = int(line.split("\t")[3])
                if label != "-1":
                    scores.append([entry, score])
            except:
                pass

        # order list
        sorted_scores = sorted(scores, key=lambda scores: scores[0])

        svm_scores = list()
        for score in sorted_scores:
            svm_score = score[1]
            svm_scores.append(svm_score)

        # remove the temporary model files and results
        try:
            os.remove(os.path.join(TMP_PATH, input_svm))
            os.remove(os.path.join(TMP_PATH, model_svm))
            os.remove(os.path.join(TMP_PATH, output_svm))
        except:
            pass

        # save the peptides in db
        # for peptide, score in zip(peptides, svm_scores):
        #     store_sqlite(peptide, method="SVM", information=score, save=True)

    # restore normal output
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__

    print("End SVM")
    return svm_scores
Ejemplo n.º 6
0
 def __init__(self):
     self.Features = SVMFeatures()
     self.TDFeatures = SVMTDFeatures()
     self.svminstance = SVM()