コード例 #1
0
ファイル: kegg_model.py プロジェクト: amyrbrown/assets
 def __init__(self, S, cids, rids=None):
     self.S = S
     self.cids = cids
     self.rids = rids
     assert len(self.cids) == self.S.shape[0]
     if self.rids is not None:
         assert len(self.rids) == self.S.shape[1]
     self.ccache = CompoundCacher.getInstance()
コード例 #2
0
def main(fname, pH, I, T):
    ccache = CompoundCacher.getInstance()
    for row in csv.reader(open(fname, 'r'), delimiter='\t'):
        cid = re.findall('C([0-9]+)_10', row[0])[0]
        cid = int(cid)
        dG0 = float(row[1])
        comp = ccache.get_kegg_compound(cid)
        dG0_prime = dG0 + comp.transform_neutral(pH, I, T)
        print 'C%05d\t%f\t%f' % (cid, dG0, dG0_prime)
    ccache.dump()
コード例 #3
0
    def __init__(self, training_data):
        """
            Initialize G matrix, and then use the python script "inchi2gv.py" to decompose each of the 
            compounds that has an InChI and save the decomposition as a row in the G matrix.
        """
        self.ccache = CompoundCacher.getInstance()

        self.groups_data = GroupsData.FromGroupsFile(GROUP_CSV, transformed=False)
        self.inchi2gv = InChI2GroupVector(self.groups_data)
        self.group_names = self.groups_data.GetGroupNames()

        self.training_data = training_data
コード例 #4
0
 def __init__(self, training_data):
     """
         Initialize G matrix, and then use the python script "inchi2gv.py" to decompose each of the 
         compounds that has an InChI and save the decomposition as a row in the G matrix.
     """
     self.ccache = CompoundCacher.getInstance()
     
     self.groups_data = GroupsData.FromGroupsFile(GROUP_CSV, transformed=False)
     self.inchi2gv = InChI2GroupVector(self.groups_data)
     self.group_names = self.groups_data.GetGroupNames()
     
     self.training_data = training_data
コード例 #5
0
    def __init__(self):
        self.ccache = CompoundCacher.getInstance()
        
        base_path = os.path.split(os.path.realpath(__file__))[0]
    
        fname, weight = TrainingData.FNAME_DICT['TECRDB']
        fname = os.path.join(base_path, fname)
        tecrdb_params = TrainingData.read_tecrdb(fname, weight)
        
        fname, weight = TrainingData.FNAME_DICT['FORMATION']
        fname = os.path.join(base_path, fname)
        formation_params, cids_that_dont_decompose = TrainingData.read_formations(fname, weight)
        
        fname, weight = TrainingData.FNAME_DICT['REDOX']
        fname = os.path.join(base_path, fname)
        redox_params = TrainingData.read_redox(fname, weight)
        
        thermo_params = tecrdb_params + formation_params + redox_params
        
        cids = set()
        for d in thermo_params:
            cids = cids.union(d['reaction'].keys())
        cids = sorted(cids)
        
        # convert the list of reactions in sparse notation into a full
        # stoichiometric matrix, where the rows (compounds) are according to the
        # CID list 'cids'.
        self.S = np.zeros((len(cids), len(thermo_params)))
        for k, d in enumerate(thermo_params):
            for cid, coeff in d['reaction'].iteritems():
                self.S[cids.index(cid), k] = coeff
            
        self.cids = cids
        self.cids_that_dont_decompose = cids_that_dont_decompose

        self.dG0_prime = np.array([d['dG\'0'] for d in thermo_params])
        self.T = np.array([d['T'] for d in thermo_params])
        self.I = np.array([d['I'] for d in thermo_params])
        self.pH = np.array([d['pH'] for d in thermo_params])
        self.pMg = np.array([d['pMg'] for d in thermo_params])
        self.weight = np.array([d['weight'] for d in thermo_params])
        rxn_inds_to_balance = [i for i in xrange(len(thermo_params))
                               if thermo_params[i]['balance']]

        self.balance_reactions(rxn_inds_to_balance)
        
        self.reverse_transform()
コード例 #6
0
    def __init__(self):
        self.ccache = CompoundCacher.getInstance()

        # verify that the files exist
        for fname, _ in TrainingData.FNAME_DICT.values():
            if not os.path.exists(fname):
                raise Exception('file not found: ' + fname)

        tecrdb_params = TrainingData.read_tecrdb()

        formation_params, cids_that_dont_decompose = TrainingData.read_formations(
        )

        redox_params = TrainingData.read_redox()

        thermo_params = tecrdb_params + formation_params + redox_params

        cids = set()
        for d in thermo_params:
            cids = cids.union(d['reaction'].keys())
        cids = sorted(cids)

        # convert the list of reactions in sparse notation into a full
        # stoichiometric matrix, where the rows (compounds) are according to the
        # CID list 'cids'.
        self.S = np.zeros((len(cids), len(thermo_params)))
        for k, d in enumerate(thermo_params):
            for cid, coeff in d['reaction'].iteritems():
                self.S[cids.index(cid), k] = coeff

        self.cids = cids
        self.cids_that_dont_decompose = cids_that_dont_decompose

        self.dG0_prime = np.array([d['dG\'0'] for d in thermo_params])
        self.T = np.array([d['T'] for d in thermo_params])
        self.I = np.array([d['I'] for d in thermo_params])
        self.pH = np.array([d['pH'] for d in thermo_params])
        self.pMg = np.array([d['pMg'] for d in thermo_params])
        self.weight = np.array([d['weight'] for d in thermo_params])
        rxn_inds_to_balance = [
            i for i in xrange(len(thermo_params))
            if thermo_params[i]['balance']
        ]

        self.balance_reactions(rxn_inds_to_balance)

        self.reverse_transform()
コード例 #7
0
    def __init__(self):
        self.ccache = CompoundCacher.getInstance()
    
        # verify that the files exist
        for fname, _ in TrainingData.FNAME_DICT.values():
            if not os.path.exists(fname):
                raise Exception('file not found: ' + fname)
        
        tecrdb_params = TrainingData.read_tecrdb()
        
        formation_params, cids_that_dont_decompose = TrainingData.read_formations()
        
        redox_params = TrainingData.read_redox()
        
        thermo_params = tecrdb_params + formation_params + redox_params
        
        cids = set()
        for d in thermo_params:
            cids = cids.union(d['reaction'].keys())
        cids = sorted(cids)
        
        # convert the list of reactions in sparse notation into a full
        # stoichiometric matrix, where the rows (compounds) are according to the
        # CID list 'cids'.
        self.S = np.zeros((len(cids), len(thermo_params)))
        for k, d in enumerate(thermo_params):
            for cid, coeff in d['reaction'].iteritems():
                self.S[cids.index(cid), k] = coeff
            
        self.cids = cids;
        self.cids_that_dont_decompose = cids_that_dont_decompose

        self.dG0_prime = np.array([d['dG\'0'] for d in thermo_params])
        self.T = np.array([d['T'] for d in thermo_params])
        self.I = np.array([d['I'] for d in thermo_params])
        self.pH = np.array([d['pH'] for d in thermo_params])
        self.pMg = np.array([d['pMg'] for d in thermo_params])
        self.weight = np.array([d['weight'] for d in thermo_params])
        rxn_inds_to_balance = [i for i in xrange(len(thermo_params))
                               if thermo_params[i]['balance']]

        self.balance_reactions(rxn_inds_to_balance)
        
        self.reverse_transform()
コード例 #8
0
 def __init__(self, training_data):
     """
         Initialize G matrix, and then use the python script "inchi2gv.py" to decompose each of the 
         compounds that has an InChI and save the decomposition as a row in the G matrix.
     """
     self.ccache = CompoundCacher.getInstance()
     
     self.groups_data = init_groups_data()
     self.inchi2gv = InChI2GroupVector(self.groups_data)
     self.group_names = self.groups_data.GetGroupNames()
     
     self.train_cids = training_data.cids
     self.train_S = training_data.S
     self.train_b = np.matrix(training_data.dG0).T
     self.train_w = np.matrix(training_data.weight).T
     self.train_G = None
     self.train_S_joined = None
     self.model_S_joined = None
     self.params = None
コード例 #9
0
    def is_balanced(self):
        cids = list(self.keys())
        coeffs = np.array([self.sparse[cid] for cid in cids], ndmin=2).T
    
        elements, Ematrix = CompoundCacher.getInstance().get_kegg_ematrix(cids)
        conserved = Ematrix.T * coeffs
        
        if np.any(np.isnan(conserved), 0):
            logging.debug('cannot test reaction balancing because of unspecific '
                          'compound formulas: %s' % self.write_formula())
            return True
        
        if np.any(conserved != 0, 0):
            logging.debug('unbalanced reaction: %s' % self.write_formula())
            for j in np.where(conserved[:, 0])[0].flat:
                logging.debug('there are %d more %s atoms on the right-hand side' %
                              (conserved[j, 0], elements[j]))
            return False

        return True
コード例 #10
0
    def is_balanced(self):
        cids = list(self.keys())
        coeffs = np.array([self.sparse[cid] for cid in cids], ndmin=2).T

        elements, Ematrix = CompoundCacher.getInstance().get_kegg_ematrix(cids)
        conserved = Ematrix.T * coeffs

        if np.any(np.isnan(conserved), 0):
            logging.debug(
                'cannot test reaction balancing because of unspecific '
                'compound formulas: %s' % self.write_formula())
            return True

        if np.any(conserved != 0, 0):
            logging.debug('unbalanced reaction: %s' % self.write_formula())
            for j in np.where(conserved[:, 0])[0].flat:
                logging.debug(
                    'there are %d more %s atoms on the right-hand side' %
                    (conserved[j, 0], elements[j]))
            return False

        return True
コード例 #11
0
 def __init__(self, S, cids):
     self.S = S
     self.cids = cids
     assert len(self.cids) == self.S.shape[0]
     self.ccache = CompoundCacher.getInstance()
コード例 #12
0
import sys, logging
sys.path.append('../python')
from compound import Compound
from inchi2gv import init_groups_data, InChI2GroupVector, GroupDecompositionError
from compound_cacher import CompoundCacher
from molecule import Molecule

#logger = logging.getLogger('')
#logger.setLevel(logging.DEBUG)
ccache = CompoundCacher.getInstance('../cache/compounds.json')
groups_data = init_groups_data()
group_list = groups_data.GetGroupNames()
inchi2gv_converter = InChI2GroupVector(groups_data)

patterns = ['c~[O;+0]', 'c~[O;+1]', 'c~[n;+1]~c', 'c~[n;+0]~c', 'c~[n;-1]~c']

for cid in [255, 1007]:
    comp = ccache.get_kegg_compound(cid)
    print "-"*50, '\nC%05d' % cid
    inchi = comp.inchi
    mol = Molecule.FromInChI(inchi)
    print mol.ToSmiles()
    
    print mol.FindSmarts("c~[n;+1]~c")
    
    try:
        groupvec = inchi2gv_converter.InChI2GroupVector(inchi)
        sys.stdout.write(str(groupvec) + '\n')
    except GroupDecompositionError as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write(e.GetDebugTable())
コード例 #13
0
 def __init__(self, S, cids):
     self.S = S
     self.cids = cids
     assert len(self.cids) == self.S.shape[0]
     self.ccache = CompoundCacher.getInstance()