import eval_dataset

"""
Instead of dropping or keeping the noisy labels, started
trying to just the majority vote. This is saner and became the default
"""


print
print "---"
print "Human MHC1"
X_human_mhc1_filter, Y_human_mhc1_filter = iedb.load_tcell_ngrams(
                 noisy_labels = 'majority',
                 human = True,
                 mhc_class = 1)
eval_dataset.eval_cv(X_human_mhc1_filter, Y_human_mhc1_filter)



print
print "---"
print "No HLA-A2"
X_no_hla_a2, Y_no_hla_a2 = iedb.load_tcell_ngrams(
                 noisy_labels = 'majority',
                 human = True,
                 mhc_class = 1,
                 exclude_hla_type = 'HLA-A2$|A-\*02')
eval_dataset.eval_cv(X_no_hla_a2, Y_no_hla_a2)


print
  eval_dataset.eval_split(x,y,x_test,y_test)

ASSAY = 'cytotoxicity'

print
print "---"
print "aromatic unigram"
X, Y, f = iedb.load_tcell_ngrams(
                 noisy_labels = 'majority', assay_group = ASSAY, subsample_bigger_class = True,
                 human = True,
                 mhc_class = 1,
                 max_ngram = 1,
                 reduced_alphabet= reduced_alphabet.aromatic2,
                 return_transformer = True)

eval_dataset.eval_cv(X, Y)
print "Tumor-specific antigens"
run(X,Y,f)

print
print "---"
print "aromatic bigram"
X, Y, f = iedb.load_tcell_ngrams(
                 noisy_labels = 'majority', assay_group = ASSAY, subsample_bigger_class = True,
                 human = True,
                 mhc_class = 1,
                 max_ngram = 2,
                 reduced_alphabet= reduced_alphabet.aromatic2,
                 return_transformer = True)

eval_dataset.eval_cv(X, Y)
from epitopes import iedb, amino_acid, features, reduced_alphabet

import eval_dataset

"""
Do results from a restrict HLA sample (only A2) generalize to all the other HLA types?

(repeated for AA bigrams)
"""
A2 = "A2$|A\*02"

print
print "---"
print "Human MHC1 (keep)"
X_human_mhc1, Y_human_mhc1 = iedb.load_tcell_ngrams(noisy_labels="keep", human=True, max_ngram=2, mhc_class=1)
eval_dataset.eval_cv(X_human_mhc1, Y_human_mhc1)


print
print "---"
print "Human MHC1 (drop)"
X_human_mhc1_filter, Y_human_mhc1_filter = iedb.load_tcell_ngrams(
    noisy_labels="drop", human=True, max_ngram=2, mhc_class=1
)
eval_dataset.eval_cv(X_human_mhc1_filter, Y_human_mhc1_filter)


print
print "---"
print "Human MHC1 noisy = positive"
X_human_mhc1_positive, Y_human_mhc1_positive = iedb.load_tcell_ngrams(