Example #1
0
import numpy as np
import sklearn
import sklearn.cross_validation
import sklearn.ensemble
import sklearn.svm

from epitopes import imma2, features

print "Loading data and transforming to toxin features"
imm, non = imma2.load_classes()
X, Y = features.toxin_features(imm, non, substring_length = 2, positional=False)

def run_classifiers(X,Y):
  print "Data shape", X.shape
  for c in [0.0001, 0.001, 0.01]:#, 0.1, 1, 10]:
    svm = sklearn.svm.LinearSVC(C=c)
    print "SVM C =", c
    print np.mean(sklearn.cross_validation.cross_val_score(svm, X, Y, cv = 10))

  n_classifiers = 2000
  rf = sklearn.ensemble.RandomForestClassifier(n_classifiers)
  print "Random Forest"
  print np.mean(sklearn.cross_validation.cross_val_score(rf, X, Y, cv = 10))

"""
Toxin features alone seem to do terribly
"""
run_classifiers(X,Y)

"""
Try reducing the dimensionality
import numpy as np
import sklearn
import sklearn.cross_validation
import sklearn.ensemble
import sklearn.svm

from epitopes import features, imma2

print "Loading data and transforming to toxin features"
imm, non = imma2.load_classes()
X, Y = features.toxin_features(imm, non, substring_length = 3, positional=True)

def run_classifiers(X,Y):
  print "Data shape", X.shape
  for c in [0.0001, 0.001, 0.01, 0.1, 1]:
    svm = sklearn.svm.LinearSVC(C=c)
    print "SVM C =", c
    print np.mean(sklearn.cross_validation.cross_val_score(svm, X, Y, cv = 10))

  n_classifiers = 1000
  rf = sklearn.ensemble.RandomForestClassifier(n_classifiers)
  print "Random Forest"
  print np.mean(sklearn.cross_validation.cross_val_score(rf, X, Y, cv = 10))

"""
Toxin features alone seem to do terribly
"""
run_classifiers(X,Y)