Ejemplo n.º 1
0
    def verify_large_polymer(self, form, alphabet, i_trial):
        start = time.time()
        process = psutil.Process(os.getpid())
        uss0 = process.memory_full_info().uss

        form.from_str(alphabet * pow(2, i_trial))
        length = 4 * pow(2, i_trial)
        self.assertEqual(len(form), length)
        formula = form.get_formula()
        charge = form.get_charge()
        end1 = time.time()
        uss1 = process.memory_full_info().uss

        if length <= 50:
            structure = form.get_structure()[0]
            self.assertEqual(OpenBabelUtils.get_formula(structure), formula)
            self.assertEqual(structure.GetTotalCharge(), charge)
        else:
            structure = None
        end2 = time.time()
        uss2 = process.memory_full_info().uss

        if length <= 20:
            form.get_major_micro_species(7.4, major_tautomer=False)
        end3 = time.time()
        uss3 = process.memory_full_info().uss

        if length <= 5:
            form.get_major_micro_species(7.4, major_tautomer=True)
        end4 = time.time()
        uss4 = process.memory_full_info().uss

        if structure is not None:
            OpenBabelUtils.export(structure, 'smiles')
        end5 = time.time()
        uss5 = process.memory_full_info().uss

        print(('Calculating polymer of length {} took {:.3f} s'
               '\n  Parsing, length, formula, charge: {:.3f} s, {}'
               '\n  Structure: {:.3f} s, {}'
               '\n  Major microspecies: {:.3f} s, {}'
               '\n  Major tautomer: {:.3f} s, {}'
               '\n  Canonical SMILES: {:.3f} s, {}'
               ).format(
            length, end3 - start,
            end1 - start, hurry.filesize.size(uss1 - uss0),
            end2 - end1, hurry.filesize.size(uss2 - uss1),
            end3 - end2, hurry.filesize.size(uss3 - uss2),
            end4 - end3, hurry.filesize.size(uss4 - uss3),
            end5 - end4, hurry.filesize.size(uss5 - uss4),
        ))
Ejemplo n.º 2
0
    def structure_to_smiles_and_props(self, structure, ph):
        """ Convert InChI or SMILES string in the knowledge base 
            to a SMILES string at specific pH and calculate properties such
            as empirical formula, charge and molecular weight 

        Args:
            structure (:obj:`str`): InChI or SMILES string
            ph (:obj:`float`): pH at which the properties should be determined

        Returns:
            :obj:`str`: SMILES string
            :obj:`wc_utils.util.chem.core.EmpiricalFormula`: empirical formula
            :obj:`int`: charge
            :obj:`float`: molecular weight    
        """
        structure_type = 'inchi' if 'InChI=' in structure else 'smiles'
        smiles = get_major_micro_species(structure, structure_type, 'smiles', ph=ph)        
        mol = openbabel.OBMol()
        conv = openbabel.OBConversion()
        conv.SetInFormat('smi')
        conv.SetOptions('c', conv.OUTOPTIONS)
        conv.ReadString(mol, smiles)        
        empirical_formula = OpenBabelUtils.get_formula(mol)
        charge = mol.GetTotalCharge()
        mol_wt = empirical_formula.get_molecular_weight()
        
        return smiles, empirical_formula, charge, mol_wt
Ejemplo n.º 3
0
    def _default(self):
        args = self.app.pargs
        type = bpforms.util.get_form(args.alphabet)

        try:
            form = type().from_str(args.seq)
        except Exception as error:
            raise SystemExit('Form is invalid: {}'.format(str(error)))
        form.circular = args.circular

        errors = form.validate()
        if errors:
            raise SystemExit('Form is invalid:\n  {}'.format(
                '\n  '.join(errors)))

        smiles = None
        formula = None
        mol_wt = None
        charge = None
        try:
            if args.ph is None:
                formula = form.get_formula()
                mol_wt = form.get_mol_wt()
                charge = form.get_charge()
                structure = form.get_structure()[0]
            else:
                structure = form.get_major_micro_species(
                    args.ph,
                    major_tautomer=args.major_tautomer,
                    dearomatize=args.dearomatize)
                if structure is not None:
                    formula = OpenBabelUtils.get_formula(structure)
                    mol_wt = formula.get_molecular_weight()
                    charge = structure.GetTotalCharge()

            if structure is not None:
                smiles = OpenBabelUtils.export(structure, 'smiles')
        except Exception:
            pass

        print('Length: {}'.format(len(form)))
        print('Structure: {}'.format(smiles))
        print('Formula: {}'.format(formula))
        print('Molecular weight: {}'.format(mol_wt))
        print('Charge: {}'.format(charge))
Ejemplo n.º 4
0
    def test_ProteinAlphabetBuilder_get_resid_monomer_structure(self):
        path = os.path.join(self.dirname, 'alphabet.yml')

        structure, index_n, index_c = protein.ProteinAlphabetBuilder().get_resid_monomer_structure(
            'AA0005', self.tmp_pdbfile, ph=7.4, major_tautomer=True)

        # just in case check that original structure has not been modified
        self.assertEqual(OpenBabelUtils.export(structure, 'smiles'),
                         'OC(=O)[C@@H]([NH3+])CS')

        # check if correct index for N and C atoms
        self.assertEqual(index_n, 6)
        self.assertEqual(index_c, 2)
Ejemplo n.º 5
0
    def _default(self):
        args = self.app.pargs
        type = bpforms.util.get_form(args.alphabet)

        try:
            form = type().from_str(args.seq)
        except Exception as error:
            raise SystemExit('Form is invalid: {}'.format(str(error)))
        form.circular = args.circular

        errors = form.validate()
        if errors:
            raise SystemExit('Form is invalid:\n  {}'.format(
                '\n  '.join(errors)))

        structure = form.get_major_micro_species(
            args.ph,
            major_tautomer=args.major_tautomer,
            dearomatize=args.dearomatize)
        print(OpenBabelUtils.export(structure, 'smiles'))
Ejemplo n.º 6
0
    def post(self):
        """ Optionally, calculate the major protonation and tautomerization form a biopolymer form and calculate its properties """
        """
        Returns:
            :obj:`dict`
        """

        args = bpform_ns.payload
        alphabet = args['alphabet']
        seq = args['seq']
        circular = args.get('circular', False)
        ph = args.get('ph', float('NaN'))
        major_tautomer = args.get('major_tautomer', False)
        dearomatize = args.get('dearomatize', False)

        form_cls = bpforms.util.get_form(alphabet)

        try:
            form = form_cls().from_str(seq)
        except Exception as error:
            flask_restplus.abort(400,
                                 'Unable to parse sequence of monomeric forms',
                                 errors={'seq': str(error)})
        form.circular = circular

        errors = form.validate()
        if errors:
            flask_restplus.abort(400,
                                 'Form is invalid',
                                 errors={'seq': '. '.join(errors)})

        smiles = None
        formula = None
        mol_wt = None
        charge = None
        with warnings.catch_warnings(record=True) as recorded_warnings:
            warnings.simplefilter('once', bpforms.BpFormsWarning)

            try:
                if math.isnan(ph):
                    formula = dict(form.get_formula())
                    mol_wt = form.get_mol_wt()
                    charge = form.get_charge()

                    if len(form.seq) <= config['max_len_get_structure']:
                        structure = form.get_structure()[0]
                    else:
                        structure = None
                        warnings.warn(
                            'Structure calculations are limited to forms with length <= {}'
                            .format(config['max_len_get_structure']),
                            bpforms.BpFormsWarning)

                else:
                    if major_tautomer and len(form.seq) > config[
                            'max_len_get_major_micro_species_major_tautomer']:
                        warnings.warn(
                            'Major tautomer calculations are limited to forms with length <= {}'
                            .format(config[
                                'max_len_get_major_micro_species_major_tautomer']
                                    ), bpforms.BpFormsWarning)
                        structure = None
                    elif len(form.seq
                             ) > config['max_len_get_major_micro_species']:
                        warnings.warn(
                            'Major microspecies calculations are limited to forms with length <= {}'
                            .format(config['max_len_get_major_micro_species']),
                            bpforms.BpFormsWarning)
                        structure = None
                    else:
                        structure = form.get_major_micro_species(
                            ph,
                            major_tautomer=major_tautomer,
                            dearomatize=dearomatize)

                    if structure is not None:
                        formula = OpenBabelUtils.get_formula(structure)
                        mol_wt = formula.get_molecular_weight()
                        formula = dict(formula)
                        charge = structure.GetTotalCharge()

                if structure is None:
                    smiles = None
                else:
                    smiles = OpenBabelUtils.export(structure, 'smiles')
            except Exception:
                pass

            if recorded_warnings:
                warning_message = ' '.join(
                    str(recorded_warning.message)
                    for recorded_warning in recorded_warnings)
            else:
                warning_message = None

        return {
            'alphabet': alphabet,
            'seq': str(form),
            'length': len(form),
            'structure': smiles,
            'formula': formula,
            'mol_wt': mol_wt,
            'charge': charge,
            'warnings': warning_message,
        }
Ejemplo n.º 7
0
def validate_bpform_bonds(form_type):
    """ Validate bonds in alphabet

    Args:
        form_type (:obj:`type`): type of BpForm

    Raises:
        :obj:`ValueError`: if any of the bonds are invalid
    """

    form = form_type()

    element_table = openbabel.OBElementTable()

    errors = []

    # validate bonds to backbone
    atom_types = [
        ['backbone', 'monomer_bond_atoms'],
        ['backbone', 'monomer_displaced_atoms'],
        ['bond', 'l_bond_atoms'],
        ['bond', 'r_bond_atoms'],
        ['bond', 'l_displaced_atoms'],
        ['bond', 'r_displaced_atoms'],
    ]
    for molecule_md, atom_type in atom_types:
        molecule = getattr(form, molecule_md)
        selected_hydrogens = []
        for atom_md in getattr(molecule, atom_type):
            if atom_md.molecule == core.Backbone:
                if form.backbone.structure:
                    n_backbone_atoms = form.backbone.structure.NumAtoms()
                else:
                    n_backbone_atoms = 0
                if atom_md.position < 1 or atom_md.position > n_backbone_atoms:
                    errors.append('Invalid position {} for {}.{}'.format(
                        atom_md.position, molecule_md, atom_type))
                    continue

                atom = form.backbone.structure.GetAtom(atom_md.position)
                if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
                    atom = core.get_hydrogen_atom(atom, selected_hydrogens,
                                                  None)
                    if atom is None:
                        continue

                if element_table.GetSymbol(
                        atom.GetAtomicNum()) != atom_md.element:
                    errors.append(
                        'Invalid element {} != {} at position {} for {}.{}'.
                        format(element_table.GetSymbol(atom.GetAtomicNum()),
                               atom_md.element, atom_md.position, molecule_md,
                               atom_type))

    # validate bonds to monomer
    atom_types = [
        'backbone_bond_atoms',
        'backbone_displaced_atoms',
        'r_bond_atoms',
        'l_bond_atoms',
        'r_displaced_atoms',
        'l_displaced_atoms',
    ]
    for i_monomer, monomer in enumerate(form.alphabet.monomers.values()):
        for atom_type in atom_types:
            selected_hydrogens = []
            for atom_md in getattr(monomer, atom_type):
                if atom_md.molecule == core.Monomer:
                    if atom_md.position < 1 or atom_md.position > monomer.structure.NumAtoms(
                    ):
                        errors.append(
                            'Invalid position {} for monomeric form:{} {}'.
                            format(atom_md.position, monomer.id, atom_type))
                        continue

                    atom = monomer.structure.GetAtom(atom_md.position)
                    if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
                        atom = core.get_hydrogen_atom(atom, selected_hydrogens,
                                                      i_monomer)
                        if atom is None:
                            continue

                    if element_table.GetSymbol(
                            atom.GetAtomicNum()) != atom_md.element:
                        errors.append(
                            'Invalid element {} != {} at position {} for monomeric form:{} {}'
                            .format(
                                element_table.GetSymbol(atom.GetAtomicNum()),
                                atom_md.element, atom_md.position, monomer.id,
                                atom_type))

    # validate monomeric forms and dimers
    for monomer in form.alphabet.monomers.values():
        monomer_form = form_type(seq=[monomer])
        try:
            monomer_structure = monomer_form.get_structure()[0]
            if monomer_form.get_formula() != OpenBabelUtils.get_formula(
                    monomer_structure):
                errors.append(
                    'Monomeric form of {} has incorrect formula: {} != {}'.
                    format(monomer.id, str(monomer_form.get_formula()),
                           str(OpenBabelUtils.get_formula(monomer_structure))))
                continue
            if monomer_form.get_charge() != monomer_structure.GetTotalCharge():
                errors.append(
                    'Monomeric form of {} has incorrect charge: {} != {}'.
                    format(monomer.id, monomer_form.get_charge(),
                           monomer_structure.GetTotalCharge()))
                continue
            OpenBabelUtils.export(monomer_structure, 'smiles')
            OpenBabelUtils.export(monomer_structure, 'inchi')
        except Exception as error:
            errors.append(
                'Unable to create monomeric form of {}:\n    {}'.format(
                    monomer.id, str(error)))

        if form.can_monomer_bond_left(monomer) and form.can_monomer_bond_right(
                monomer):
            dimer_form = form_type(seq=[monomer, monomer])
            try:
                dimer_structure = dimer_form.get_structure()[0]
                if dimer_form.get_formula() != OpenBabelUtils.get_formula(
                        dimer_structure):
                    errors.append(
                        'Dimer of {} has incorrect formula: {} != {}'.format(
                            monomer.id, str(dimer_form.get_formula()),
                            str(OpenBabelUtils.get_formula(dimer_structure))))
                    continue
                if dimer_form.get_charge() != dimer_structure.GetTotalCharge():
                    errors.append(
                        'Dimer of {} has incorrect charge: {} != {}'.format(
                            monomer.id, dimer_form.get_charge(),
                            dimer_structure.GetTotalCharge()))
                    continue
                OpenBabelUtils.export(dimer_structure, 'smiles')
                OpenBabelUtils.export(dimer_structure, 'inchi')
            except Exception as error:
                errors.append('Unable to form dimer of {}:\n    {}'.format(
                    monomer.id, str(error)))

    # report errors
    if errors:
        raise ValueError('BpForm {} is invalid:\n  {}'.format(
            form_type.__name__, '\n  '.join(errors)))
Ejemplo n.º 8
0
 def test_get_structure(self):
     form = protein.ProteinForm().from_str('ARCGY' * 100)
     structure, _ = form.get_structure()
     self.assertIsInstance(structure, openbabel.OBMol)
     cml = OpenBabelUtils.export(structure, 'cml')
     self.assertTrue(cml.startswith('<molecule'))