예제 #1
0
class NP_Epitope_Prediction_Tests_Fitted(unittest.TestCase):
	"""
	Basic test for the NP_Epitope_Prediction class, 
	if this test runs successful, the NP_Epitope_Prediction is up and 
	running and can be used. This tests only the already fitted classifier.

	Assuming that the methods:
		* fit_chain
		* update_cluster_descri
		* update_onto_mapping

	did run without errors.
	"""

	def set_up(self):

		TEST_DATA_PATH = "ML_data"
		self.NP_E_Pred = NP_Epitope_Prediction(data_storage = TEST_DATA_PATH)

	def test_json_output(self):

		self.set_up()

		test_smiles = "Cc1onc(-c2c(F)cccc2Cl)c1C(=O)N[C@H](C(=O)NCCCC[C@H](N)C(=O)O)[C@@H]1N[C@@H](C(=O)O)C(C)(C)S1"

		results = self.NP_E_Pred.prediction_chain(test_smiles, 
			only_epitopes = False, 
			compute_k_best = 5, 
			show_k_best = None, 
			sort_order = "E")

		json_text_result = Results_To_Json(results)

		with open(os.path.join("test_data", "example_output.json"),"r") as json_file:
			json_test_expected = json_file.read()

		self.assertEqual(json_text_result, json_test_expected)
예제 #2
0
#This is where the new data is generated,
#The update function keeps the cluster model
#(cluster_clf.pickle) as it is.
#Also the ontology mapping of the clusters and
#the cluster descriptions are kept. They can be updated anytime,
#using the update_onto_mapping and update_cluster_descri method of
#NP_Epitope_Prediction.
DATA_PATH = "ML_data_updated"

#this is where the input data (IEDB csv and ChEBI sdf) is located
UPDATE_PATH = os.path.join(DATA_PATH, 'epitope_update_input', '10-29-2020')

#the input data files
SDF_PATH = os.path.join(UPDATE_PATH, "ChEBI_lite_3star.sdf")
B_CELL = os.path.join(UPDATE_PATH, "epitope_table_b_cell_pos.csv")
T_CELL = os.path.join(UPDATE_PATH, "epitope_table_t_cell_pos.csv")

#the NP_Epitope_Data_Conversion will generate this file, which is then passed to the update_chain
SMILES_PATH = os.path.join(UPDATE_PATH, "chebi_san_assigned.csv")

print("Convert input data to NP_Epitope_Prediction training data")
converter = NP_Epitope_Data_Conversion(DATA_PATH)
converter.create_ML_data(SDF_PATH, B_CELL, T_CELL, skip_sdf=False)

print("Initiate NP_Epitope_Prediction class")
predictor = NP_Epitope_Prediction(data_storage=DATA_PATH)

print("Update the predictor")
predictor.update_chain(SMILES_PATH)
예제 #3
0
from epitope_prediction import NP_Epitope_Prediction, Molecule_Group_Classifier, Epitope_Predictor
from output_utils import Results_To_Html, Results_To_Json

import pandas as pd
import os

#############################
#Run specific methods
#############################

FILE_PATH = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(FILE_PATH, "ML_data_updated")

predictor = NP_Epitope_Prediction(data_storage=DATA_PATH)
predictor.update_epitope_prediction_info()

#############################
#Retrain the classifier
#############################

# S_PATH = "ML_data_tests/epitope_update/11.05.2020"
# SMILES_PATH = os.path.join(S_PATH, "chebi_san_assigned.csv")

# #load data
# smiles_df = pd.read_csv(SMILES_PATH, index_col = "index")

# #print(smiles_df)
# DATA_PATH = "ML_data_tests"
# predictor = NP_Epitope_Prediction(data_storage = DATA_PATH)

# #refits the entire predictor, need reassignment of molecular groups and ontos
from epitope_prediction import NP_Epitope_Prediction, Molecule_Group_Classifier, Epitope_Predictor
from output_utils import Results_To_Html, Results_To_Json, Results_To_Django

# import sys
# sys.modules['Molecule_Group_Classifier'] = Molecule_Group_Classifier
# sys.modules['Epitope_Predictor'] = Epitope_Predictor
# sys.modules['NP_Epitope_Prediction'] = NP_Epitope_Prediction

import os
import pandas as pd

FILE_PATH = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(FILE_PATH, "ML_data_updated")

predictor = NP_Epitope_Prediction(data_storage=DATA_PATH)


def prediction2django(smiles):

    results = predictor.prediction_chain(smiles,
                                         only_epitopes=True,
                                         sort_order="E")
    results = Results_To_Django(results)

    return (results)


##################
# test
##################
예제 #5
0
from epitope_prediction import NP_Epitope_Prediction, Molecule_Group_Classifier, Epitope_Predictor
from output_utils import Results_To_Html, Results_To_Json

import pandas as pd
import os

DATA_PATH = "ML_data"

#load data
SMILES_PATH = os.path.join(DATA_PATH, "chebi_san_assigned.csv")
smiles_df = pd.read_csv(SMILES_PATH, index_col="index")

#print(smiles_df)

predictor = NP_Epitope_Prediction(data_storage=DATA_PATH)
predictor.fit_chain(smiles_df)

exit()

#############################
#Run a prdiction
#############################

smiles = "CC(=O)NC1[C@@H](OC(C(C1O)O)CO)O"

smiles = "Cc1onc(-c2c(F)cccc2Cl)c1C(=O)N[C@H](C(=O)NCCCC[C@H](N)C(=O)O)[C@@H]1N[C@@H](C(=O)O)C(C)(C)S1"

smiles = "COC(=O)CCc1c(C)c2=CC3=[N]4C(=Cc5c(C=C)c(C)c6C=C7C(C)=C(CCC(O)=O)C8=[N]7[Mg]4(n56)n2c1=C8)C(C)=C3C=C"

smiles = "CCCCCCCCCCCCC\C=C\[C@@H](O)[C@H](CO[C@@H]1O[C@H](CO)[C@@H](O[C@@H]2O[C@H](CO)[C@H](O[C@@H]3O[C@H](CO)[C@H](O)[C@H](O[C@@H]4O[C@H](CO)[C@H](O)[C@H](O[C@@]5(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O5)[C@H](O)[C@@H](CO)O[C@@]5(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O5)[C@H](O)[C@H](O)CO)C(O)=O)C(O)=O)[C@H]4O)[C@H]3NC(C)=O)[C@H](O[C@@]3(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O3)[C@H](O)[C@@H](CO)O[C@@]3(C[C@H](O)[C@@H](NC(C)=O)[C@@H](O3)[C@H](O)[C@H](O)CO)C(O)=O)C(O)=O)[C@H]2O)[C@H](O)[C@H]1O)NC([*])=O"
예제 #6
0
	def set_up(self):

		TEST_DATA_PATH = "ML_data"
		self.NP_E_Pred = NP_Epitope_Prediction(data_storage = TEST_DATA_PATH)