def __init__(self, training_data):
     """
         Initialize G matrix, and then use the python script "inchi2gv.py" to decompose each of the 
         compounds that has an InChI and save the decomposition as a row in the G matrix.
     """
     self.ccache = CompoundCacher.getInstance()
     
     self.groups_data = init_groups_data()
     self.inchi2gv = InChI2GroupVector(self.groups_data)
     self.group_names = self.groups_data.GetGroupNames()
     
     self.train_cids = training_data.cids
     self.train_S = training_data.S
     self.train_b = np.matrix(training_data.dG0).T
     self.train_w = np.matrix(training_data.weight).T
     self.train_G = None
     self.train_S_joined = None
     self.model_S_joined = None
     self.params = None
    def __init__(self, training_data=None):
        if training_data is None:
            training_data = TrainingData()

        self.train_cids = list(training_data.cids)
        self.cids_joined = list(training_data.cids)

        self.train_S = training_data.S
        self.model_S_joined = np.matrix(self.train_S)
        self.train_S_joined = self.model_S_joined
        
        self.train_b = np.matrix(training_data.dG0).T
        self.train_w = np.matrix(training_data.weight).T
        self.train_G = None
        self.params = None

        self.ccache = CompoundCacher()
        self.groups_data = inchi2gv.init_groups_data()
        self.decomposer = inchi2gv.InChIDecomposer(self.groups_data)
        self.group_names = self.groups_data.GetGroupNames()
        
        self.Nc = len(self.cids_joined)
        self.Ng = len(self.group_names)
Beispiel #3
0
import sys

sys.path.append('../python')
import inchi2gv
from compound_cacher import CompoundCacher
from molecule import Molecule

#logger = logging.getLogger('')
#logger.setLevel(logging.DEBUG)
ccache = CompoundCacher('../cache/compounds.json')
groups_data = inchi2gv.init_groups_data()
group_list = groups_data.GetGroupNames()
group_names = groups_data.GetGroupNames()
decomposer = inchi2gv.InChIDecomposer(groups_data)

# test the decomposition of ATP into groups
ATP_inchi = ccache.get_compound('C00002').inchi
group_def = decomposer.inchi_to_groupvec(ATP_inchi)
for j, group_name in enumerate(group_names):
    if group_def[j] != 0:
        print group_name, ' x %d' % group_def[j]

patterns = ['c~[O;+0]', 'c~[O;+1]', 'c~[n;+1]~c', 'c~[n;+0]~c', 'c~[n;-1]~c']

for cid in ['C00255', 'C01007']:
    comp = ccache.get_compound(cid)
    print "-" * 50, '\n%s' % cid
    inchi = comp.inchi
    mol = Molecule.FromInChI(inchi)
    print mol.ToSmiles()
try:
    from compound_cacher import CompoundCacher
    ccache = CompoundCacher()
    atp_comp = ccache.get_compound('C00002')
    assert(smiles_ATP_pH7 == atp_comp.smiles_pH7)
except AssertionError:
    sys.stderr.write('Internal Error: the SMILES string for ATP is wrong.\n')
    err_num += 1
except Exception as e:
    sys.stderr.write('Error using Compound Cacher: ' + str(e))
    err_num += 1

# Test inchi2gv.py
try:
    import inchi2gv    
    groups_data = inchi2gv.init_groups_data()
    decomposer = inchi2gv.InChIDecomposer(groups_data)
    groupvec1 = decomposer.inchi_to_groupvec(inchi_ATP)
    groupvec2 = decomposer.smiles_to_groupvec(smiles_ATP_pH7)
    print groupvec1
    print groupvec2
    for group_ind, group_count in enumerate(groupvec.Flatten()):
        assert(ATP_group_dict.get(group_ind, 0) == group_count)
        
except ImportError:
    sys.stderr.write('Cannot import the python script inchi2gv. Make sure the file '
                     'inchi2gv.py is located in the folder '
                     '"component-contribution/python/".\n')
    err_num += 1
except inchi2gv.GroupDecompositionError as e:
    sys.stderr.write('Internal Error: cannot decompose the compound ATP.\n')
import sys, logging
sys.path.append('../python')
from compound import Compound
from inchi2gv import init_groups_data, InChI2GroupVector, GroupDecompositionError
from compound_cacher import CompoundCacher
from molecule import Molecule

#logger = logging.getLogger('')
#logger.setLevel(logging.DEBUG)
ccache = CompoundCacher.getInstance('../cache/compounds.json')
groups_data = init_groups_data()
group_list = groups_data.GetGroupNames()
inchi2gv_converter = InChI2GroupVector(groups_data)

patterns = ['c~[O;+0]', 'c~[O;+1]', 'c~[n;+1]~c', 'c~[n;+0]~c', 'c~[n;-1]~c']

for cid in [255, 1007]:
    comp = ccache.get_kegg_compound(cid)
    print "-"*50, '\nC%05d' % cid
    inchi = comp.inchi
    mol = Molecule.FromInChI(inchi)
    print mol.ToSmiles()
    
    print mol.FindSmarts("c~[n;+1]~c")
    
    try:
        groupvec = inchi2gv_converter.InChI2GroupVector(inchi)
        sys.stdout.write(str(groupvec) + '\n')
    except GroupDecompositionError as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write(e.GetDebugTable())