def compute_train_kernel(self, g, m, t=20, approx=True, I=100, delta=0.025, skip_variance=False): kernel = FastSK(g=g, m=m, t=t, approx=approx, max_iters=I, delta=delta, skip_variance=skip_variance) kernel.compute_train(self.train_seq)
def train_and_test(self, g, m, t, approx, I=100, delta=0.025, skip_variance=False, C=1): kernel = FastSK( g=g, m=m, t=t, approx=approx, max_iters=I, delta=delta, skip_variance=skip_variance, ) kernel.compute_kernel(self.train_seq, self.test_seq) self.Xtrain = kernel.get_train_kernel() self.Xtest = kernel.get_test_kernel() self.stdevs = kernel.get_stdevs() svm = LinearSVC(C=C, class_weight="balanced") self.clf = CalibratedClassifierCV(svm, cv=5).fit(self.Xtrain, self.Ytrain) acc, auc = self.evaluate_clf() return acc, auc
def main(args): ## Get data reader = FastaUtility() Xtrain, Ytrain = reader.read_data(args.train) Xtest, Ytest = reader.read_data(args.test) Ytest = np.array(Ytest).reshape(-1, 1) ## Compute kernel matrix fastsk = FastSK(g=10, m=6, t=1, approx=True) fastsk.compute_kernel(Xtrain, Xtest) Xtrain = fastsk.get_train_kernel() Xtest = fastsk.get_test_kernel() reader = FastaUtility() Xseq, Ytrain = reader.read_data(args.train) ## Train a linear SVM svm = LinearSVC(C=1) clf = CalibratedClassifierCV(svm, cv=5).fit(Xtrain, Ytrain) ## Evaluate acc = clf.score(Xtest, Ytest) probs = clf.predict_proba(Xtest)[:, 1] auc = roc_auc_score(Ytest, probs) print("Linear SVM:\n\tAcc = {}, AUC = {}".format(acc, auc)) assert auc >= 0.9, "AUC is not correct. Should be >= 0.9. Received: {}".format(auc)
def train_and_test(self, g, m, t, approx, I=100, delta=0.025, skip_variance=False): kernel = FastSK( g=g, m=m, t=t, approx=approx, max_iters=I, delta=delta, skip_variance=skip_variance, ) kernel.compute_kernel(self.train_seq, self.test_seq) self.Xtest = kernel.get_test_kernel() self.Xtest = np.array(self.Xtest).reshape(len(self.Xtest), -1) self.Xtrain = kernel.get_train_kernel() self.Xtrain = np.array(self.Xtrain).reshape(len(self.Xtrain), -1) # Can replace Lasso with alternative regression approaches such as SVR model = LassoCV(cv=5, n_jobs=t, random_state=293).fit(self.Xtrain, self.Ytrain) r2 = model.score(self.Xtest, self.Ytest) return r2
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--train', default='../data/EP300.train.fasta', help='training sequences file') parser.add_argument('--test', default='../data/EP300.test.fasta', help='test sequences file') args = parser.parse_args() ## Compute kernel matrix fastsk = FastSK(g=10, m=6, t=1, approx=True) fastsk.compute_kernel(args.train, args.test) Xtrain = fastsk.get_train_kernel() Xtest = fastsk.get_test_kernel() reader = FastaUtility() Xseq, Ytrain = reader.read_data(args.train) ## Use linear SVM svm = LinearSVC(C=1) clf = CalibratedClassifierCV(svm, cv=5).fit(Xtrain, Ytrain) ## Evaluate reader = FastaUtility() Xseq, Ytest = reader.read_data(args.test)
args.t, args.approx, args.I, args.delta, ) skip_variance = args.skip_variance ### Read the data reader = FastaUtility() Xtrain, Ytrain = reader.read_data(train_file) Xtest, Ytest = reader.read_data(test_file) Ytest = np.array(Ytest).reshape(-1, 1) ### Compute the fastsk kernel start = time.time() fastsk = FastSK( g=g, m=m, t=t, approx=approx, max_iters=I, delta=d, skip_variance=skip_variance ) fastsk.compute_kernel(Xtrain, Xtest) end = time.time() print("Kernel computation time: ", end - start) Xtrain = fastsk.get_train_kernel() Xtest = fastsk.get_test_kernel() ### Use linear SVM svm = LinearSVC(C=C) clf = CalibratedClassifierCV(svm, cv=5).fit(Xtrain, Ytrain) acc, auc = evaluate_clf(clf, Xtest, Ytest) print("Linear SVM:\n\tAcc = {}, AUC = {}".format(acc, auc))
import glob from fastsk import FastSK datasets = [] for filepath in glob.iglob('../../data/*.train.fasta'): s = filepath[:filepath.find(".train")] datasets.append(s) #print(s) for s in datasets: name = s[s.find("/") + 1:] # Uncomment for confirmation for each dataset train ''' s = input("about to train on " + name + " dataset.\ny/n?") if s.lower() != "y": continue ''' fastsk = FastSK(g=7, m=3, t=16, approx=True, max_iters=75) fastsk.compute_kernel(Xtrain=s + ".train.fasta", Xtest="../../data/" + name + ".test.fasta") fastsk.fit() fastsk.score(metric='auc')