def __init__(self, training_data): """ Initialize G matrix, and then use the python script "inchi2gv.py" to decompose each of the compounds that has an InChI and save the decomposition as a row in the G matrix. """ self.ccache = CompoundCacher.getInstance() self.groups_data = init_groups_data() self.inchi2gv = InChI2GroupVector(self.groups_data) self.group_names = self.groups_data.GetGroupNames() self.train_cids = training_data.cids self.train_S = training_data.S self.train_b = np.matrix(training_data.dG0).T self.train_w = np.matrix(training_data.weight).T self.train_G = None self.train_S_joined = None self.model_S_joined = None self.params = None
def __init__(self, training_data=None): if training_data is None: training_data = TrainingData() self.train_cids = list(training_data.cids) self.cids_joined = list(training_data.cids) self.train_S = training_data.S self.model_S_joined = np.matrix(self.train_S) self.train_S_joined = self.model_S_joined self.train_b = np.matrix(training_data.dG0).T self.train_w = np.matrix(training_data.weight).T self.train_G = None self.params = None self.ccache = CompoundCacher() self.groups_data = inchi2gv.init_groups_data() self.decomposer = inchi2gv.InChIDecomposer(self.groups_data) self.group_names = self.groups_data.GetGroupNames() self.Nc = len(self.cids_joined) self.Ng = len(self.group_names)
import sys sys.path.append('../python') import inchi2gv from compound_cacher import CompoundCacher from molecule import Molecule #logger = logging.getLogger('') #logger.setLevel(logging.DEBUG) ccache = CompoundCacher('../cache/compounds.json') groups_data = inchi2gv.init_groups_data() group_list = groups_data.GetGroupNames() group_names = groups_data.GetGroupNames() decomposer = inchi2gv.InChIDecomposer(groups_data) # test the decomposition of ATP into groups ATP_inchi = ccache.get_compound('C00002').inchi group_def = decomposer.inchi_to_groupvec(ATP_inchi) for j, group_name in enumerate(group_names): if group_def[j] != 0: print group_name, ' x %d' % group_def[j] patterns = ['c~[O;+0]', 'c~[O;+1]', 'c~[n;+1]~c', 'c~[n;+0]~c', 'c~[n;-1]~c'] for cid in ['C00255', 'C01007']: comp = ccache.get_compound(cid) print "-" * 50, '\n%s' % cid inchi = comp.inchi mol = Molecule.FromInChI(inchi) print mol.ToSmiles()
try: from compound_cacher import CompoundCacher ccache = CompoundCacher() atp_comp = ccache.get_compound('C00002') assert(smiles_ATP_pH7 == atp_comp.smiles_pH7) except AssertionError: sys.stderr.write('Internal Error: the SMILES string for ATP is wrong.\n') err_num += 1 except Exception as e: sys.stderr.write('Error using Compound Cacher: ' + str(e)) err_num += 1 # Test inchi2gv.py try: import inchi2gv groups_data = inchi2gv.init_groups_data() decomposer = inchi2gv.InChIDecomposer(groups_data) groupvec1 = decomposer.inchi_to_groupvec(inchi_ATP) groupvec2 = decomposer.smiles_to_groupvec(smiles_ATP_pH7) print groupvec1 print groupvec2 for group_ind, group_count in enumerate(groupvec.Flatten()): assert(ATP_group_dict.get(group_ind, 0) == group_count) except ImportError: sys.stderr.write('Cannot import the python script inchi2gv. Make sure the file ' 'inchi2gv.py is located in the folder ' '"component-contribution/python/".\n') err_num += 1 except inchi2gv.GroupDecompositionError as e: sys.stderr.write('Internal Error: cannot decompose the compound ATP.\n')
import sys, logging sys.path.append('../python') from compound import Compound from inchi2gv import init_groups_data, InChI2GroupVector, GroupDecompositionError from compound_cacher import CompoundCacher from molecule import Molecule #logger = logging.getLogger('') #logger.setLevel(logging.DEBUG) ccache = CompoundCacher.getInstance('../cache/compounds.json') groups_data = init_groups_data() group_list = groups_data.GetGroupNames() inchi2gv_converter = InChI2GroupVector(groups_data) patterns = ['c~[O;+0]', 'c~[O;+1]', 'c~[n;+1]~c', 'c~[n;+0]~c', 'c~[n;-1]~c'] for cid in [255, 1007]: comp = ccache.get_kegg_compound(cid) print "-"*50, '\nC%05d' % cid inchi = comp.inchi mol = Molecule.FromInChI(inchi) print mol.ToSmiles() print mol.FindSmarts("c~[n;+1]~c") try: groupvec = inchi2gv_converter.InChI2GroupVector(inchi) sys.stdout.write(str(groupvec) + '\n') except GroupDecompositionError as e: sys.stderr.write(str(e) + '\n') sys.stderr.write(e.GetDebugTable())