import eval_dataset """ Instead of dropping or keeping the noisy labels, started trying to just the majority vote. This is saner and became the default """ print print "---" print "Human MHC1" X_human_mhc1_filter, Y_human_mhc1_filter = iedb.load_tcell_ngrams( noisy_labels = 'majority', human = True, mhc_class = 1) eval_dataset.eval_cv(X_human_mhc1_filter, Y_human_mhc1_filter) print print "---" print "No HLA-A2" X_no_hla_a2, Y_no_hla_a2 = iedb.load_tcell_ngrams( noisy_labels = 'majority', human = True, mhc_class = 1, exclude_hla_type = 'HLA-A2$|A-\*02') eval_dataset.eval_cv(X_no_hla_a2, Y_no_hla_a2) print
eval_dataset.eval_split(x,y,x_test,y_test) ASSAY = 'cytotoxicity' print print "---" print "aromatic unigram" X, Y, f = iedb.load_tcell_ngrams( noisy_labels = 'majority', assay_group = ASSAY, subsample_bigger_class = True, human = True, mhc_class = 1, max_ngram = 1, reduced_alphabet= reduced_alphabet.aromatic2, return_transformer = True) eval_dataset.eval_cv(X, Y) print "Tumor-specific antigens" run(X,Y,f) print print "---" print "aromatic bigram" X, Y, f = iedb.load_tcell_ngrams( noisy_labels = 'majority', assay_group = ASSAY, subsample_bigger_class = True, human = True, mhc_class = 1, max_ngram = 2, reduced_alphabet= reduced_alphabet.aromatic2, return_transformer = True) eval_dataset.eval_cv(X, Y)
from epitopes import iedb, amino_acid, features, reduced_alphabet import eval_dataset """ Do results from a restrict HLA sample (only A2) generalize to all the other HLA types? (repeated for AA bigrams) """ A2 = "A2$|A\*02" print print "---" print "Human MHC1 (keep)" X_human_mhc1, Y_human_mhc1 = iedb.load_tcell_ngrams(noisy_labels="keep", human=True, max_ngram=2, mhc_class=1) eval_dataset.eval_cv(X_human_mhc1, Y_human_mhc1) print print "---" print "Human MHC1 (drop)" X_human_mhc1_filter, Y_human_mhc1_filter = iedb.load_tcell_ngrams( noisy_labels="drop", human=True, max_ngram=2, mhc_class=1 ) eval_dataset.eval_cv(X_human_mhc1_filter, Y_human_mhc1_filter) print print "---" print "Human MHC1 noisy = positive" X_human_mhc1_positive, Y_human_mhc1_positive = iedb.load_tcell_ngrams(