Ejemplo n.º 1
0
    def test_get_formula_and_connectivity(self):
        glc6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)/t2-,3-,4+,5-,6-/m1/s1'
        )
        self.assertEqual(glc6p.get_formula_and_connectivity(),
                         'C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8')

        water = molecule_util.InchiMolecule('InChI=1S/H2O/h1H2')
        self.assertEqual(water.get_formula_and_connectivity(), 'H2O')
Ejemplo n.º 2
0
 def test_is_protonation_isomer(self):
     a = molecule_util.InchiMolecule(
         'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2')
     b = molecule_util.InchiMolecule(
         'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h3')
     c = molecule_util.InchiMolecule('InChI=1S/C6H13O9P/c7/h4')
     self.assertTrue(a.is_protonation_isomer(a))
     self.assertTrue(b.is_protonation_isomer(b))
     self.assertFalse(a.is_protonation_isomer(c))
Ejemplo n.º 3
0
 def test_is_tautomer(self):
     a = molecule_util.InchiMolecule(
         'InChI=1S/C5H5N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H,(H4,6,7,8,9,10,11)/t1'
     )
     b = molecule_util.InchiMolecule(
         'InChI=1S/C5H5N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H,(H4,6,7,8,9,10,11)/t2'
     )
     c = molecule_util.InchiMolecule(
         'InChI=1S/C5H5N5O/c6/h1H,(H4,6,7,8,9,10,11)/t2')
     self.assertTrue(a.is_tautomer(a))
     self.assertTrue(a.is_tautomer(a))
     self.assertFalse(a.is_tautomer(c))
Ejemplo n.º 4
0
    def test_is_stereoisomer(self):
        glc6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)/t2-,3-,4+,5-,6-/m1/s1'
        )
        gal6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)/t2-,3+,4+,5-,6?/m1/s1'
        )
        fru6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h4-7,9-11H,1-2H2,(H2,12,13,14)/t4-,5-,6-/m1/s1'
        )

        self.assertTrue(glc6p.is_stereoisomer(glc6p))
        self.assertTrue(glc6p.is_stereoisomer(gal6p))
        self.assertFalse(glc6p.is_stereoisomer(fru6p))
Ejemplo n.º 5
0
    def test_is_equal(self):
        a = molecule_util.InchiMolecule('InChI=1S/BrH/h1H/p-1')
        c = molecule_util.InchiMolecule('InChI=1S/BrH/h1H')
        self.assertTrue(a.is_equal(a))
        self.assertFalse(a.is_equal(c))

        glc6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)/t2-,3-,4+,5-,6-/m1/s1'
        )
        gal6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)/t2-,3+,4+,5-,6?/m1/s1'
        )
        fru6p = molecule_util.InchiMolecule(
            'InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h4-7,9-11H,1-2H2,(H2,12,13,14)/t4-,5-,6-/m1/s1'
        )

        self.assertTrue(glc6p.is_equal(glc6p, check_stereochemistry=False))
        self.assertTrue(glc6p.is_equal(gal6p, check_stereochemistry=False))
        self.assertFalse(glc6p.is_equal(fru6p, check_stereochemistry=False))
Ejemplo n.º 6
0
    def test(self):
        inchi = 'InChI=1S/C3H4O3/c1-2(4)3(5)6/h1H3,(H,5,6)'
        layers = molecule_util.InchiMolecule(inchi)
        self.assertEqual(
            layers.__dict__, {
                'formula': 'C3H4O3',
                'connections': '1-2(4)3(5)6',
                'hydrogens': '1H3,(H,5,6)',
                'protons': '',
                'charge': '',
                'double_bonds': '',
                'stereochemistry': '',
                'stereochemistry_parity': '',
                'stereochemistry_type': '',
                'isotopes': '',
                'fixed_hydrogens': '',
                'reconnected_metals': '',
            })
        self.assertEqual(str(layers), inchi)

        inchi = 'InChI=1S/C3H4O3/c1-2(4)3(5)6'
        layers = molecule_util.InchiMolecule(inchi)
        self.assertEqual(
            layers.__dict__, {
                'formula': 'C3H4O3',
                'connections': '1-2(4)3(5)6',
                'hydrogens': '',
                'protons': '',
                'charge': '',
                'double_bonds': '',
                'stereochemistry': '',
                'stereochemistry_parity': '',
                'stereochemistry_type': '',
                'isotopes': '',
                'fixed_hydrogens': '',
                'reconnected_metals': '',
            })
        self.assertEqual(str(layers), inchi)

        inchi = 'InChI=1S/Ni/q+2'
        self.assertEqual(str(molecule_util.InchiMolecule(inchi)), inchi)

        inchi = 'InChI=1S/BrH/h1H/p-1'
        self.assertEqual(str(molecule_util.InchiMolecule(inchi)), inchi)

        inchi = 'InChI=1S/p+1'
        self.assertEqual(str(molecule_util.InchiMolecule(inchi)), inchi)

        inchi = 'InChI=1S/4O.V/q;3*-1;'
        self.assertEqual(str(molecule_util.InchiMolecule(inchi)), inchi)

        inchi_1 = 'InChI=1/C6H12O6/c7-1-3-4(9)5(10)6(11,2-8)12-3/h3-5,7-11H,1-2H2/t3-,4-,5+,6u/m0/s1'
        inchi_2 = 'InChI=1S/C6H12O6/c7-1-3-4(9)5(10)6(11,2-8)12-3/h3-5,7-11H,1-2H2/t3-,4-,5+,6u/m0/s1'
        self.assertEqual(str(molecule_util.InchiMolecule(inchi_1)), inchi_2)
Ejemplo n.º 7
0
    def to_inchi(self, only_formula_and_connectivity=False):
        """ Get the structure in InChi format

        Args:
            only_formula_and_connectivity (:obj:`bool`): if :obj:`True`, return only the
                formula and connectivity layers

        Returns:
            :obj:`str`: structure in InChi format or just the formula and connectivity layers
                if :obj:`only_formula_and_connectivity` is :obj:`True`
        """
        inchi = molecule_util.Molecule(structure=self.structure).to_inchi()
        if only_formula_and_connectivity:
            return molecule_util.InchiMolecule(
                inchi).get_formula_and_connectivity()
        else:
            return inchi
Ejemplo n.º 8
0
    def get_concentration_by_structure(self, inchi, only_formula_and_connectivity=True, select = models.Concentration):
        """
        Args:
            inchi (:obj:`str`): inchi structure to find concentrations

        Returns:
            :obj:`list`: List of models.Concentration Objects
        """

        q = self.data_source.session.query(select).join((models.Metabolite, select.metabolite)).\
            join((models.Structure, models.Metabolite.structure))

        if only_formula_and_connectivity:
            formula_and_connectivity = molecule_util.InchiMolecule(inchi).get_formula_and_connectivity()
            condition = models.Structure._structure_formula_connectivity == formula_and_connectivity
        else:
            condition = models.Structure._value_inchi == inchi

        return q.filter(condition).all()
Ejemplo n.º 9
0
    def get_metabolites_by_structure(self, inchi, only_formula_and_connectivity=False, select=models.Metabolite):
        """ Get metabolites with the same structure. Optionally, get metabolites which only have
        the same core empirical formula and core atom connecticity (i.e. same InChI formula
        and connectivity layers).

        Args:
            inchi (:obj:`str`): molecule structure in InChI format
            only_formula_and_connectivity (:obj:`bool`, optional): if :obj:`True`, get metabolites which only have
                the same core empirical formula and core atom connecticity. if :obj:`False`, get metabolites with the
                identical structure.

        Returns:
            :obj:`sqlalchemy.orm.query.Query`: query for matching metabolites
        """
        q = self.data_source.session.query(select).join((models.Structure, models.Metabolite.structure))
        if only_formula_and_connectivity:
            formula_and_connectivity = molecule_util.InchiMolecule(inchi).get_formula_and_connectivity()
            condition = models.Structure._structure_formula_connectivity == formula_and_connectivity
        else:
            condition = models.Structure._value_inchi == inchi
        return q.filter(condition)
Ejemplo n.º 10
0
    def load_content(self):
        """ Download the content of ECMDB and store it to a local sqlite database. """
        db_session = self.session
        req_session = self.requests_session

        # download content from server
        if self.verbose:
            print('Downloading compound IDs ...')

        response = req_session.get(self.DOWNLOAD_INDEX_URL)
        response.raise_for_status()

        if self.verbose:
            print('  done')

        # unzip and parse content
        if self.verbose:
            print('Parsing compound IDs ...')

        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_file:
            with zip_file.open('ecmdb.json', 'r') as json_file:
                entries = json.load(json_file)

        if self.verbose:
            print('  found {} compounds'.format(len(entries)))

        # sort entires
        entries.sort(key=lambda e: e['m2m_id'])

        # limit number of processed entries
        if len(entries) > self.max_entries:
            entries = entries[0:self.max_entries]

        # load content into sqlite database
        if self.verbose:
            print('Downloading {} compounds ...'.format(len(entries)))

        xml_parser = jxmlease.Parser()
        for i_entry, entry in enumerate(entries):
            if self.verbose and (i_entry % 10 == 0):
                print('  Downloading compound {} of {}'.format(
                    i_entry + 1, len(entries)))

            # get details
            response = req_session.get(
                self.DOWNLOAD_COMPOUND_URL.format(entry['m2m_id']))
            try:
                response.raise_for_status()
            except requests.exceptions.HTTPError:
                warnings.warn(
                    'Unable to download data for compound {}'.format(
                        entry['m2m_id']), data_source.DataSourceWarning)
                continue

            entry_details = xml_parser(response.text)['compound']

            compound = self.get_or_create_object(Compound,
                                                 id=self.get_node_text(
                                                     entry_details['m2m_id']))

            if 'name' in entry_details:
                compound.name = self.get_node_text(entry_details['name'])

            if 'description' in entry_details:
                compound.description = self.get_node_text(
                    entry_details['description'])

            compound.structure = self.get_node_text(entry_details['inchi'])
            if not compound.structure:
                response2 = req_session.get(
                    self.DOWNLOAD_COMPOUND_STRUCTURE_URL.format(
                        entry['m2m_id']))
                response2.raise_for_status()
                compound.structure = response2.text

            compound.comment = entry['comment']

            compound.created = dateutil.parser.parse(
                self.get_node_text(
                    entry_details['creation_date'])).replace(tzinfo=None)
            compound.updated = dateutil.parser.parse(
                self.get_node_text(
                    entry_details['update_date'])).replace(tzinfo=None)

            # calculate core InChI layers to facilitate searching
            try:
                compound._structure_formula_connectivity = molecule_util.InchiMolecule(compound.structure) \
                    .get_formula_and_connectivity()
            except ValueError:
                warnings.warn(
                    'Unable to encode structure for {} in InChI'.format(
                        entry['m2m_id']), data_source.DataSourceWarning)
                compound._structure_formula_connectivity = None

            # synonyms
            compound.synonyms = []

            if 'iupac_name' in entry_details:
                node = entry_details['iupac_name']
                name = self.get_node_text(node)
                compound.synonyms.append(
                    self.get_or_create_object(Synonym, name=name))

            if 'traditional_iupac' in entry_details:
                node = entry_details['traditional_iupac']
                name = self.get_node_text(node)
                compound.synonyms.append(
                    self.get_or_create_object(Synonym, name=name))

            parent_node = entry_details['synonyms']
            if 'synonym' in parent_node:
                nodes = self.get_node_children(parent_node, 'synonym')
                for node in nodes:
                    name = self.get_node_text(node)
                    compound.synonyms.append(
                        self.get_or_create_object(Synonym, name=name))

            # locations
            compound.compartments = []
            parent_node = entry_details['cellular_locations']
            if 'cellular_location' in parent_node:
                nodes = self.get_node_children(parent_node,
                                               'cellular_location')
                for node in nodes:
                    name = self.get_node_text(node)
                    compound.compartments.append(
                        self.get_or_create_object(Compartment, name=name))

            # todo (enhancement): parse experimental properties
            # * state
            # * melting_point
            # * water_solubility
            # * logp_hydrophobicity

            # concentrations
            compound.concentrations = []
            parent_node = entry_details['concentrations']
            if 'concentration' in parent_node:
                values = self.get_node_children(parent_node, 'concentration')
                errors = self.get_node_children(parent_node, 'error')
                units = self.get_node_children(parent_node,
                                               'concentration_units')
                strains = self.get_node_children(parent_node, 'strain')
                statuses = self.get_node_children(parent_node, 'growth_status')
                medias = self.get_node_children(parent_node, 'growth_media')
                temperatures = self.get_node_children(parent_node,
                                                      'temperature')
                systems = self.get_node_children(parent_node, 'growth_system')
                references = self.get_node_children(parent_node, 'reference')

                for i_conc in range(len(values)):
                    value = float(self.get_node_text(values[i_conc]))
                    error = float(self.get_node_text(errors[i_conc]) or 'nan')
                    unit = self.get_node_text(units[i_conc])
                    if unit == 'uM':
                        pass
                    else:
                        raise ValueError('Unsupport units: {}'.format(unit))

                    if temperatures[i_conc]:
                        temperature, unit = self.get_node_text(
                            temperatures[i_conc]).split(' ')
                        temperature = float(temperature)
                        if unit != 'oC':
                            raise ValueError(
                                'Unsupport units: {}'.format(unit))
                    else:
                        temperature = None

                    concentration = Concentration(
                        value=value,
                        error=error,
                        strain=self.get_node_text(strains[i_conc]) or None,
                        growth_status=self.get_node_text(statuses[i_conc])
                        or None,
                        media=self.get_node_text(medias[i_conc]) or None,
                        temperature=temperature,
                        growth_system=self.get_node_text(systems[i_conc])
                        or None,
                    )
                    db_session.add(concentration)

                    if 'pubmed_id' in references[i_conc]:
                        pmid_nodes = self.get_node_children(
                            references[i_conc], 'pubmed_id')
                        for node in pmid_nodes:
                            id = self.get_node_text(node)
                            concentration.references.append(
                                self.get_or_create_object(Resource,
                                                          namespace='pubmed',
                                                          id=id))

                    compound.concentrations.append(concentration)

            # cross references
            compound.cross_references = []

            id = self.get_node_text(entry_details['biocyc_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='biocyc',
                                              id=id))

            id = self.get_node_text(entry_details['cas_registry_number'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource, namespace='cas',
                                              id=id))

            id = self.get_node_text(entry_details['chebi_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='chebi',
                                              id='CHEBI:' + id))

            id = self.get_node_text(entry_details['chemspider_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='chemspider',
                                              id=id))

            id = self.get_node_text(entry_details['foodb_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='foodb.compound',
                                              id=id))

            id = self.get_node_text(entry_details['het_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='ligandexpo',
                                              id=id))

            id = self.get_node_text(entry_details['hmdb_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='hmdb',
                                              id=id))

            id = self.get_node_text(entry_details['kegg_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='kegg.compound',
                                              id=id))

            id = self.get_node_text(entry_details['msds_url'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='msds.url',
                                              id=id))

            id = self.get_node_text(entry_details['pubchem_compound_id'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='pubchem.compound',
                                              id=id))

            id = self.get_node_text(entry_details['wikipidia'])
            if id:
                compound.cross_references.append(
                    self.get_or_create_object(Resource,
                                              namespace='wikipedia.en',
                                              id=id))

            # add to session
            db_session.add(compound)

            if self.commit_intermediate_results and (i_entry % 100 == 99):
                db_session.commit()

        if self.verbose:
            print('  done')

        # commit changes to database
        if self.verbose:
            print('Saving database ...')

        db_session.commit()

        if self.verbose:
            print('  done')
Ejemplo n.º 11
0
 def test_remove_layer(self):
     a = molecule_util.InchiMolecule('InChI=1S/BrH/h1H/p-1')
     a.remove_layer('protons')
     self.assertEqual(str(a), 'InChI=1S/BrH/h1H')