class NP_Epitope_Prediction_Tests_Fitted(unittest.TestCase): """ Basic test for the NP_Epitope_Prediction class, if this test runs successful, the NP_Epitope_Prediction is up and running and can be used. This tests only the already fitted classifier. Assuming that the methods: * fit_chain * update_cluster_descri * update_onto_mapping did run without errors. """ def set_up(self): TEST_DATA_PATH = "ML_data" self.NP_E_Pred = NP_Epitope_Prediction(data_storage = TEST_DATA_PATH) def test_json_output(self): self.set_up() test_smiles = "Cc1onc(-c2c(F)cccc2Cl)c1C(=O)N[C@H](C(=O)NCCCC[C@H](N)C(=O)O)[C@@H]1N[C@@H](C(=O)O)C(C)(C)S1" results = self.NP_E_Pred.prediction_chain(test_smiles, only_epitopes = False, compute_k_best = 5, show_k_best = None, sort_order = "E") json_text_result = Results_To_Json(results) with open(os.path.join("test_data", "example_output.json"),"r") as json_file: json_test_expected = json_file.read() self.assertEqual(json_text_result, json_test_expected)
#This is where the new data is generated, #The update function keeps the cluster model #(cluster_clf.pickle) as it is. #Also the ontology mapping of the clusters and #the cluster descriptions are kept. They can be updated anytime, #using the update_onto_mapping and update_cluster_descri method of #NP_Epitope_Prediction. DATA_PATH = "ML_data_updated" #this is where the input data (IEDB csv and ChEBI sdf) is located UPDATE_PATH = os.path.join(DATA_PATH, 'epitope_update_input', '10-29-2020') #the input data files SDF_PATH = os.path.join(UPDATE_PATH, "ChEBI_lite_3star.sdf") B_CELL = os.path.join(UPDATE_PATH, "epitope_table_b_cell_pos.csv") T_CELL = os.path.join(UPDATE_PATH, "epitope_table_t_cell_pos.csv") #the NP_Epitope_Data_Conversion will generate this file, which is then passed to the update_chain SMILES_PATH = os.path.join(UPDATE_PATH, "chebi_san_assigned.csv") print("Convert input data to NP_Epitope_Prediction training data") converter = NP_Epitope_Data_Conversion(DATA_PATH) converter.create_ML_data(SDF_PATH, B_CELL, T_CELL, skip_sdf=False) print("Initiate NP_Epitope_Prediction class") predictor = NP_Epitope_Prediction(data_storage=DATA_PATH) print("Update the predictor") predictor.update_chain(SMILES_PATH)
from epitope_prediction import NP_Epitope_Prediction, Molecule_Group_Classifier, Epitope_Predictor from output_utils import Results_To_Html, Results_To_Json import pandas as pd import os ############################# #Run specific methods ############################# FILE_PATH = os.path.dirname(os.path.abspath(__file__)) DATA_PATH = os.path.join(FILE_PATH, "ML_data_updated") predictor = NP_Epitope_Prediction(data_storage=DATA_PATH) predictor.update_epitope_prediction_info() ############################# #Retrain the classifier ############################# # S_PATH = "ML_data_tests/epitope_update/11.05.2020" # SMILES_PATH = os.path.join(S_PATH, "chebi_san_assigned.csv") # #load data # smiles_df = pd.read_csv(SMILES_PATH, index_col = "index") # #print(smiles_df) # DATA_PATH = "ML_data_tests" # predictor = NP_Epitope_Prediction(data_storage = DATA_PATH) # #refits the entire predictor, need reassignment of molecular groups and ontos
from epitope_prediction import NP_Epitope_Prediction, Molecule_Group_Classifier, Epitope_Predictor from output_utils import Results_To_Html, Results_To_Json, Results_To_Django # import sys # sys.modules['Molecule_Group_Classifier'] = Molecule_Group_Classifier # sys.modules['Epitope_Predictor'] = Epitope_Predictor # sys.modules['NP_Epitope_Prediction'] = NP_Epitope_Prediction import os import pandas as pd FILE_PATH = os.path.dirname(os.path.abspath(__file__)) DATA_PATH = os.path.join(FILE_PATH, "ML_data_updated") predictor = NP_Epitope_Prediction(data_storage=DATA_PATH) def prediction2django(smiles): results = predictor.prediction_chain(smiles, only_epitopes=True, sort_order="E") results = Results_To_Django(results) return (results) ################## # test ##################
from epitope_prediction import NP_Epitope_Prediction, Molecule_Group_Classifier, Epitope_Predictor from output_utils import Results_To_Html, Results_To_Json import pandas as pd import os DATA_PATH = "ML_data" #load data SMILES_PATH = os.path.join(DATA_PATH, "chebi_san_assigned.csv") smiles_df = pd.read_csv(SMILES_PATH, index_col="index") #print(smiles_df) predictor = NP_Epitope_Prediction(data_storage=DATA_PATH) predictor.fit_chain(smiles_df) exit() ############################# #Run a prdiction ############################# smiles = "CC(=O)NC1[C@@H](OC(C(C1O)O)CO)O" smiles = "Cc1onc(-c2c(F)cccc2Cl)c1C(=O)N[C@H](C(=O)NCCCC[C@H](N)C(=O)O)[C@@H]1N[C@@H](C(=O)O)C(C)(C)S1" smiles = "COC(=O)CCc1c(C)c2=CC3=[N]4C(=Cc5c(C=C)c(C)c6C=C7C(C)=C(CCC(O)=O)C8=[N]7[Mg]4(n56)n2c1=C8)C(C)=C3C=C" smiles = "CCCCCCCCCCCCC\C=C\[C@@H](O)[C@H](CO[C@@H]1O[C@H](CO)[C@@H](O[C@@H]2O[C@H](CO)[C@H](O[C@@H]3O[C@H](CO)[C@H](O)[C@H](O[C@@H]4O[C@H](CO)[C@H](O)[C@H](O[C@@]5(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O5)[C@H](O)[C@@H](CO)O[C@@]5(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O5)[C@H](O)[C@H](O)CO)C(O)=O)C(O)=O)[C@H]4O)[C@H]3NC(C)=O)[C@H](O[C@@]3(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O3)[C@H](O)[C@@H](CO)O[C@@]3(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O3)[C@H](O)[C@H](O)CO)C(O)=O)C(O)=O)[C@H]2O)[C@H](O)[C@H]1O)NC([*])=O"
def set_up(self): TEST_DATA_PATH = "ML_data" self.NP_E_Pred = NP_Epitope_Prediction(data_storage = TEST_DATA_PATH)