DTCRS = DeepTCR_SS('reg',device='/gpu:2') p = Pool(40) #Get alpha/beta sequences alpha = np.asarray(df['alpha'].tolist()) beta = np.asarray(df['beta'].tolist()) y_pred = [] y_test = [] antigen = [] #Iterate through all antigens for i in range(2,df.columns.shape[0]): print(df.iloc[:,i].name) sel = df.iloc[:,i] Y = np.log2(np.asarray(sel.tolist()) + 1) DTCRS.Load_Data(alpha_sequences=alpha, beta_sequences=beta, Y=Y,p=p) DTCRS.K_Fold_CrossVal(split_by_sample=False,folds=5) y_pred.append(DTCRS.y_pred) y_test.append(DTCRS.y_test) antigen.append([sel.name]*len(DTCRS.y_pred)) antigen = np.hstack(antigen) y_pred = np.vstack(y_pred) y_test = np.vstack(y_test) #Save Data df_out = pd.DataFrame() df_out['Antigen'] = antigen df_out['Y_Pred'] = y_pred df_out['Y_Test'] = y_test df_out.to_csv('Regression_Results.csv',index=False)
seq_train = [] label_train = [] count_train = [] for s, seq_cl, p, c in zip(sequences, seq_class_labels, predicted, counts): sel_idx = p > thresh seq_train.append(s[sel_idx]) label_train.append(seq_cl[sel_idx]) count_train.append(c[sel_idx]) seq_train = np.hstack(seq_train) label_train = np.hstack(label_train) count_train = np.hstack(count_train) #Train Sequence Classifier DTCR = DeepTCR_SS('tw10_seq', device=gpu) DTCR.Load_Data(beta_sequences=seq_train, class_labels=label_train) DTCR.Monte_Carlo_CrossVal(folds=folds, graph_seed=graph_seed, seeds=seeds, convergence='training') y_pred = DTCR.predicted y_test = DTCR.Y plt.figure(figsize=(6, 5)) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') for ii, cl in enumerate(DTCR.lb.classes_, 0): fpr, tpr, _ = roc_curve(y_test[:, ii], y_pred[:, ii]) roc_score = roc_auc_score(y_test[:, ii], y_pred[:, ii]) label = '%s = %0.3f' % (cl, roc_score)