def testB1BondGenMol(self):
        ini_mono_type_list = [S, S, S, G, S]
        sg_ratio = 1.0
        max_monos = 12
        random_num = 55
        initial_monomers = [
            Monomer(mono_type, i)
            for i, mono_type in enumerate(ini_mono_type_list)
        ]
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        initial_events.append(Event(GROW, [], rate=1e4))
        initial_state = create_initial_state(initial_events, initial_monomers)
        result = run_kmc(DEF_RXN_RATES,
                         initial_state,
                         initial_events,
                         n_max=max_monos,
                         t_max=2,
                         random_seed=random_num,
                         sg_ratio=sg_ratio)
        nodes = result[MONO_LIST]
        adj = result[ADJ_MATRIX]
        # generate_mol(adj, nodes)
        with capture_stderr(generate_mol, adj, nodes) as output:
            self.assertFalse(output)

        mol = MolFromMolBlock(generate_mol(adj, nodes))
        mols = GetMolFrags(mol)

        analysis = analyze_adj_matrix(adj)
        frag_sizes = analysis[CHAIN_LEN]

        # Make sure there are the same number of separate fragments calculated by RDKIT
        # as we get from just separating the alternate B1
        self.assertEqual(np.sum(list(frag_sizes.values())), len(mols))
Beispiel #2
0
def test_embed_r_groups__ROR(bax_mol):
    fragment = MolFromMolBlock('''
RDKit          3D

  3  2  0  0  0  0  0  0  0  0999 V2000
    0.0000    0.0000    0.0000 R   0  0  0  0  0  1  0  0  0  0  0  0
    4.4640    1.0880   19.5620 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 R   0  0  0  0  0  1  0  0  0  0  0  0
  1  2  1  0
  3  2  1  0
M  END
    ''')
    embed_r_groups(fragment, bax_mol)

    expected = '''
     RDKit          3D

  3  2  0  0  0  0  0  0  0  0999 V2000
    3.7070    1.4910   20.6340 R   0  0  0  0  0  1  0  0  0  0  0  0
    4.4640    1.0880   19.5620 O   0  0  0  0  0  0  0  0  0  0  0  0
    4.1550    0.2360   18.5580 R   0  0  0  0  0  1  0  0  0  0  0  0
  1  2  1  0
  3  2  1  0
M  END
'''
    assert MolToMolBlock(fragment) == expected
Beispiel #3
0
def protonate_molecule(mol_in: Mol, ph=7.4) -> Mol:
    molblock_in = MolToMolBlock(mol_in)
    babel_mol = pybel.readstring('mol', molblock_in)
    babel_mol.OBMol.AddHydrogens(False, True, ph)
    molblock_out = babel_mol.write('mol')
    mol = MolFromMolBlock(molblock_out, removeHs=False, sanitize=False)
    try:
        SanitizeMol(mol)
    except ValueError:
        # Try again, but without ph correction
        babel_mol = pybel.readstring('mol', molblock_in)
        babel_mol.OBMol.AddHydrogens(False, False)
        molblock_out = babel_mol.write('mol')
        mol = MolFromMolBlock(molblock_out, removeHs=False, sanitize=False)
        SanitizeMol(mol)
    return mol
Beispiel #4
0
def test_embed_r_groups__noparentatom(bax_mol):
    fragment = MolFromMolBlock('''
     RDKit          3D

  8  7  0  0  0  0  0  0  0  0999 V2000
    0.0000    0.0000    0.0000 R   0  0  0  0  0  1  0  0  0  0  0  0
    1.1111    1.1111   11.1111 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.4480    2.7040   24.2850 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.3160    3.2190   24.7430 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.0220    3.4870   25.2640 N   0  0  0  0  0  0  0  0  0  0  0  0
   -0.2620    1.9460   23.6160 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 R   0  0  0  0  0  1  0  0  0  0  0  0
    0.5870    4.1890   25.5880 H   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0
  3  2  1  0
  2  4  1  0
  3  5  1  0
  3  6  1  0
  7  5  1  0
  5  8  1  0
M  END''')

    with pytest.raises(LookupError) as e:
        embed_r_groups(fragment, bax_mol)
        assert 'group not found in parent' in str(e.value)
Beispiel #5
0
def convert_molblockgz(molgz):
    """Convert compressed molblock to RDKit molecule

    Args:
        molgz: (str) zlib compressed molblock

    Returns:
        rdkit.Chem.Mol: molecule
    """
    return MolFromMolBlock(zlib.decompress(molgz))
Beispiel #6
0
    def test0InchiWritePubChem(self):
        for fp, f in self.dataset.items():
            inchi_db = self.dataset_inchi[fp]
            same, diff, reasonable = 0, 0, 0
            for m in f:
                if m is None:  # pragma: nocover
                    continue
                ref_inchi = inchi_db[m.GetProp('PUBCHEM_COMPOUND_CID')]
                x, y = MolToInchi(m), ref_inchi
                if x != y:
                    # print("---------------")
                    # print(m.GetProp('PUBCHEM_COMPOUND_CID'))
                    # print(MolToSmiles(m))
                    # print(y)
                    # print(x)
                    if re.search(r'.[1-9]?ClO4', x) is not None:
                        reasonable += 1
                        continue
                    SanitizeMol(m)
                    if filter(lambda i: i >= 8,
                              [len(r) for r in m.GetRingInfo().AtomRings()]):
                        reasonable += 1
                        continue
                    # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage)
                    # if it is because RDKit does not think the bond is stereo
                    z = MolToInchi(MolFromMolBlock(MolToMolBlock(m)))
                    if y != z and inchiDiffPrefix(y, z) == 'b':
                        reasonable += 1
                        continue
                    # some warning
                    try:
                        MolToInchi(m, treatWarningAsError=True)
                    except InchiReadWriteError as inst:
                        _, error = inst.args
                        if 'Metal' in error:
                            reasonable += 1
                            continue

                    diff += 1
                    print('InChI mismatch for PubChem Compound ' +
                          m.GetProp('PUBCHEM_COMPOUND_CID'))
                    print(MolToSmiles(m, True))
                    print(inchiDiff(x, y))
                    print()

                else:
                    same += 1

            fmt = "\n{0}InChI write Summary: {1} identical, {2} suffix variance, {3} reasonable{4}"
            print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET))
            self.assertEqual(same, 1162)
            self.assertEqual(diff, 0)
            self.assertEqual(reasonable, 19)
def calculate_coords(apps, schema_editor):
    # We can't import the Person model directly as it may be a newer
    # version than this migration expects. We use the historical version.
    Batch = apps.get_model("cbh_chembl_model_extension", "CBHCompoundBatch")
    for field in Batch.objects.all():
        mol = MolFromMolBlock(field.ctab)
        AllChem.Compute2DCoords(mol)
        try:
            field.ctab = MolToMolBlock(mol, includeStereo=True)
        except:
            print "test"
        field.save()
def generate_lignin(num_monomers: int = 1) -> Chem.Mol:
    """Generates lignin molecule.

        parameters
        ----------
        num_monomers : int
                Number of monomers in lignin molecule.
        """
    # Set the percentage of S
    sg_ratio = 0
    pct_s = sg_ratio / (1 + sg_ratio)

    # Set the initial and maximum number of monomers to be modeled.
    ini_num_monos = 1
    max_num_monos = num_monomers

    # Maximum time to simulate, in seconds
    t_max = 1  # seconds
    mono_add_rate = 1e4  # monomers/second

    # Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled
    monomer_draw = np.random.rand(ini_num_monos)
    initial_monomers = create_initial_monomers(pct_s, monomer_draw)

    # Initially allow only oxidation events. After they are used to determine the initial state, add
    #     GROW to the events, which allows additional monomers to be added to the reaction at the
    #     specified rate and with the specified ratio
    initial_events = create_initial_events(initial_monomers, rxn_rates)
    initial_state = create_initial_state(initial_events, initial_monomers)
    initial_events.append(Event(GROW, [], rate=mono_add_rate))

    # simulate lignin creation
    result = run_kmc(rxn_rates,
                     initial_state,
                     initial_events,
                     n_max=max_num_monos,
                     t_max=t_max,
                     sg_ratio=sg_ratio)
    # using RDKit
    nodes = result[MONO_LIST]
    adj = result[ADJ_MATRIX]
    block = generate_mol(adj, nodes)
    mol = MolFromMolBlock(block)
    mol = Chem.AddHs(mol)

    return mol
 def testMakePNG(self):
     # smoke test only--that it doesn't fail, not that it looks correct (that's outside the scope of this package)
     # The choices shown resulted (at last check) in 3 fragments, one of which has a branch
     try:
         silent_remove(TEST_PNG)
         result = create_sample_kmc_result(num_initial_monos=24, max_monos=24, seed=1, max_time=SHORT_TIME)
         summary = analyze_adj_matrix(result[ADJ_MATRIX])
         adj_analysis_to_stdout(summary, break_co_bonds=False)
         nodes = result[MONO_LIST]
         adj = result[ADJ_MATRIX]
         block = generate_mol(adj, nodes)
         mol = MolFromMolBlock(block)
         Compute2DCoords(mol)
         MolToFile(mol, TEST_PNG, size=(2000, 1200))
         self.assertTrue(os.path.isfile(TEST_PNG))
     finally:
         silent_remove(TEST_PNG, disable=DISABLE_REMOVE)
         pass
Beispiel #10
0
def produce_output(adj_matrix, mono_list, cfg):
    if cfg[SUPPRESS_SMI] and not (cfg[SAVE_JSON] or cfg[SAVE_PNG] or cfg[SAVE_SVG]):
        format_list = [SAVE_TCL]
        mol = None  # Make IDE happy
    else:
        # Default out is SMILES, which requires getting an rdKit molecule object; also required for everything
        #    except the TCL format
        format_list = [SAVE_TCL, SAVE_JSON, SAVE_PNG, SAVE_SVG]
        block = generate_mol(adj_matrix, mono_list)
        mol = MolFromMolBlock(block)
        try:
            smi_str = MolToSmiles(mol) + '\n'
        except:
            raise InvalidDataError("Error in producing SMILES string.")
        # if SMI is to be saved, don't output to stdout
        if cfg[SAVE_SMI]:
            fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=SAVE_SMI)
            str_to_file(smi_str, fname, print_info=True)
        else:
            print("\nSMILES representation: \n", MolToSmiles(mol), "\n")
        if cfg[SAVE_PNG] or cfg[SAVE_SVG] or cfg[SAVE_JSON]:
            # PNG and SVG make 2D images and thus need coordinates
            # JSON will save coordinates--zero's if not computed; might as well compute and save non-zero values
            Compute2DCoords(mol)

    for save_format in format_list:
        if cfg[save_format]:
            fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=save_format)
            if save_format == SAVE_TCL:
                gen_tcl(adj_matrix, mono_list, tcl_fname=fname, chain_id=cfg[CHAIN_ID],
                        psf_fname=cfg[PSF_FNAME], toppar_dir=cfg[TOPPAR_DIR], out_dir=cfg[OUT_DIR])
            if save_format == SAVE_JSON:
                json_str = MolToJSON(mol)
                str_to_file(json_str + '\n', fname)
            elif save_format == SAVE_PNG or save_format == SAVE_SVG:
                MolToFile(mol, fname, size=cfg[IMAGE_SIZE])
            print(f"Wrote file: {fname}")
Beispiel #11
0
valid = valid[valid[[prop, 'molUFF']].notna().all(1)]

# Rescale Y matrix
y_train_raw = train[[prop]].values
y_valid_raw = valid[[prop]].values

y_scaler = RobustNanScaler()
y_train_scaled = y_scaler.fit_transform(y_train_raw)
y_valid_scaled = y_scaler.transform(y_valid_raw)

from rdkit.Chem import MolFromMolBlock

# Transform MOLs into X matrix
preprocessor = MolPreprocessor(n_neighbors=48)

train_inputs = preprocessor.fit((MolFromMolBlock(mol) for _, mol in train.molUFF.iteritems()))
valid_inputs = preprocessor.fit((MolFromMolBlock(mol) for _, mol in valid.molUFF.iteritems()))

def rbf_expansion(distances, mu=0, delta=0.2, kmax=150):
    k = np.arange(0, kmax)
    logits = -(np.atleast_2d(distances).T - (-mu + delta * k))**2 / delta
    return np.exp(logits)

def precalc_rbfs(inputs):

    for item in tqdm(inputs):

        item['distance_rbf'] = rbf_expansion(item['distance'])
        del item['distance']

    return inputs
import numpy as np
import pandas as pd
from rdkit.Chem import MolFromMolBlock

parser = ArgumentParser()
parser.add_argument('--removeHs', '-k', action='store_true', help='remove H atoms when generating mol')


if __name__ == '__main__':
    args = parser.parse_args()

    ids = []
    mols = []
    for i in range(1, 133886):
        ids.append(f'dsgdb9nsd_{i:06}')
        try:
            with open(f'./data/input/structures/dsgdb9nsd_{i:06}.mol', 'r') as mol:
                mols.append(MolFromMolBlock(mol.read(), removeHs=args.removeHs))
        except:
            mols.append(np.nan)

    df = pd.DataFrame()
    df['ids'] = ids
    df['mols'] = mols 

    if args.removeHs:
        with open('./data/input/mols_without_Hs.pickle', 'wb') as f:
            pickle.dump(df, f)
    else:
        with open('./data/input/mols_with_Hs.pickle', 'wb') as f:
            pickle.dump(df, f)       
Beispiel #13
0
def bax_mol():
    return MolFromMolBlock('''
RDKit          3D

 48 50  0  0  0  0  0  0  0  0999 V2000
   -1.9780    2.2590   25.9480 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.2280    3.4280   25.8570 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.6980    4.5910   26.4520 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.9200    4.6600   27.1000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.4480    2.7040   24.2850 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.3380    2.3780   22.9000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.6560    2.2710   21.6650 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.3330    1.8220   20.5270 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.7070    1.4910   20.6340 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.3820    1.5940   21.8510 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.6920    2.0440   22.9670 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.4640    1.0880   19.5620 O   0  0  0  0  0  0  0  0  0  0  0  0
    3.7130   -1.0290   18.8880 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.4170   -1.9280   17.8630 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.0410   -0.3090   16.2360 C   0  0  1  0  0  0  0  0  0  0  0  0
    4.3280    0.6230   17.2290 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.1010   -1.0340   13.8920 N   0  0  0  0  0  0  0  0  0  0  0  0
    3.2530   -1.0040   12.7050 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.7100    3.5350   27.1680 C   0  0  1  0  0  0  0  0  0  0  0  0
   -3.2190    2.2650   26.5840 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.0500    0.9970   26.6980 C   0  0  1  0  0  0  0  0  0  0  0  0
   -5.2380    1.1420   26.2110 F   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4800    0.0010   26.1220 F   0  0  0  0  0  0  0  0  0  0  0  0
   -4.2250    0.6490   27.9280 F   0  0  0  0  0  0  0  0  0  0  0  0
   -5.2760    3.5990   28.0420 Cl  0  0  0  0  0  0  0  0  0  0  0  0
   -0.0220    3.4870   25.2640 N   0  0  0  0  0  0  0  0  0  0  0  0
    1.7480    2.8090   24.0510 N   0  0  0  0  0  0  0  0  0  0  0  0
   -0.2620    1.9460   23.6160 O   0  0  0  0  0  0  0  0  0  0  0  0
    4.1550    0.2360   18.5580 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.5810   -1.5370   16.5780 N   0  0  0  0  0  0  0  0  0  0  0  0
    4.1310    0.0000   14.7620 C   0  0  1  0  0  0  0  0  0  0  0  0
    4.1550    1.1730   14.3910 O   0  0  0  0  0  0  0  0  0  0  0  0
   -1.1120    5.4400   26.4100 H   0  0  0  0  0  0  0  0  0  0  0  0
    2.3160    3.2190   24.7430 H   0  0  0  0  0  0  0  0  0  0  0  0
    0.5870    4.1890   25.5880 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.0780   -2.8780   18.0820 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.6590    1.5710   16.9900 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.6630   -1.8240   14.0630 H   0  0  0  0  0  0  0  0  0  0  0  0
    0.6580    2.5250   21.6040 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.2350    5.5450   27.5270 H   0  0  0  0  0  0  0  0  0  0  0  0
    2.7140   -0.0800   12.6750 H   0  0  0  0  0  0  0  0  0  0  0  0
    2.5610   -1.8190   12.7400 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.8620   -1.0910   11.8290 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.1900    2.1340   23.8660 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.3800    1.3390   21.9200 H   0  0  0  0  0  0  0  0  0  0  0  0
    1.8410    1.7330   19.6240 H   0  0  0  0  0  0  0  0  0  0  0  0
   -1.6140    1.3830   25.5420 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.6030   -1.3050   19.8760 H   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0
  1 20  1  0
  1 47  1  0
  2  3  1  0
  2 26  1  0
  3  4  1  0
  3 33  1  0
  4 19  1  0
  4 40  1  0
  5 26  1  0
  5 27  1  0
  5 28  1  0
  6  7  1  0
  6 11  1  0
  6 27  1  0
  7  8  1  0
  7 39  1  0
  8  9  1  0
  8 46  1  0
  9 10  1  0
  9 12  1  0
 10 11  1  0
 10 45  1  0
 11 44  1  0
 12 29  1  0
 13 14  1  0
 13 29  1  0
 13 48  1  0
 14 30  1  0
 14 36  1  0
 15 16  1  0
 15 30  1  1
 15 31  1  0
 16 29  1  0
 16 37  1  0
 17 18  1  0
 17 31  1  0
 17 38  1  0
 18 41  1  0
 18 42  1  0
 18 43  1  0
 19 20  1  0
 19 25  1  1
 20 21  1  0
 21 22  1  6
 21 23  1  0
 21 24  1  0
 26 35  1  0
 27 34  1  0
 31 32  1  6
M  END
    ''')
Beispiel #14
0
    def process(
            self,
            input_file: str,
            output_file: str = "",
            output_file_sdf: str = "",
            sdf_append: bool = False,
            #images_prefix: str = "",
            format_output: bool = True,
            write_header: bool = True,
            osra_output_format: str = "",
            output_formats: list = None,
            dry_run: bool = False,
            csv_delimiter: str = ";",
            use_gm: bool = True,
            gm_dpi: int = 300,
            gm_trim: bool = True,
            n_jobs: int = -1,
            input_type: str = "",
            standardize_mols: bool = True,
            annotate: bool = True,
            chemspider_token: str = "",
            custom_page: int = 0,
            continue_on_failure: bool = False) -> OrderedDict:
        r"""
        Process the input file with OSRA.

        Parameters
        ----------
        input_file : str
            Path to file to be processed by OSRA.
        output_file : str
            File to write output in.
        output_file_sdf : str
            | File to write SDF output in. "sdf" output format hasn't to be in `output_formats` to write SDF output.
            | If "sdf_osra" output format is requested, suffix "-osra.sdf" will be added.
        sdf_append : bool
            If True, append new molecules to existing SDF file or create new one if doesn't exist.
        NOT IMPLEMENTED | images_prefix : str
            Prefix for images of extracted compounds which will be written.
        format_output : bool
            | If True, the value of "content" key of returned dict will be list of OrderedDicts.
            | If True and `output_file` is set, the CSV file will be written.
            | If False, the value of "content" key of returned dict will be None.
        write_header : bool
            If True and if `output_file` is set and `output_format` is True, write a CSV write_header.
        osra_output_format : str
            | Output format from OSRA. Temporarily overrides the option `output_format` set during instantiation (in __init__).
            | Choices: "smi", "can", "sdf"
            | If "sdf", additional information like coordinates cannot be retrieved (not implemented yet).
        output_formats : list
            | If True and `format_output` is also True, this specifies which molecule formats will be output.
            | You can specify more than one format, but only one format from OSRA. This format must be also set with `output_format` in __init__
              or with `osra_output_format` here.
            | When output produces by OSRA is unreadable by RDKit, you can at least have that output from OSRA.
            | Default value: ["smiles"]

            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            |      Value      |    Source    |                                            Note                                            |
            +=================+==============+============================================================================================+
            |      smiles     |     RDKit    |                                          canonical                                         |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            |   smiles_osra   | OSRA ("smi") |                                           SMILES                                           |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            | smiles_can_osra | OSRA ("can") |                                      canonical SMILES                                      |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            |      inchi      |     RDKit    | Not every molecule can be converted to InChI (it doesn`t support wildcard characters etc.) |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            |     inchikey    |     RDKit    |                              The same applies as for "inchi".                              |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            |       sdf       |     RDKit    |                     If present, an additional SDF file will be created.                    |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+
            |     sdf_osra    | OSRA ("sdf") |                     If present, an additional SDF file will be created.                    |
            +-----------------+--------------+--------------------------------------------------------------------------------------------+

        dry_run : bool
            If True, only return list of commands to be called by subprocess.
        csv_delimiter : str
            Delimiter for output CSV file.
        use_gm : bool
            | If True, use GraphicsMagick to convert PDF to temporary PNG images before processing.
            | If False, OSRA will use it's own conversion of PDF to image.
            | Using gm is more reliable since OSRA (v2.1.0) is showing wrong information
              when converting directly from PDF (namely: coordinates, bond length and possibly more ones) and also there are sometimes
              incorrectly recognised structures.
        gm_dpi : int
            How many DPI will temporary PNG images have.
        gm_trim : bool
            If True, gm will trim the temporary PNG images.
        n_jobs : int
            | If `use_gm` and input file is PDF, how many jobs to use for OSRA processing of temporary PNG images.
            | If -1 all CPUs are used.
            | If 1 is given, no parallel computing code is used at all, which is useful for debugging.
            | For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.
        input_type : str
            | When empty, input (MIME) type will be determined from magic bytes.
            | Or you can specify "pdf" or "image" and magic bytes check will be skipped.
        standardize_mols : bool
            If True and `format_output` is also True, use molvs (https://github.com/mcs07/MolVS) to standardize molecules.
        annotate : bool
            | If True, try to annotate entities in PubChem and ChemSpider. Compound IDs will be assigned by searching with
              each identifier, separately for SMILES, InChI etc.
            | If entity has InChI key yet, prefer it in searching.
            | If "*" is present in SMILES, skip annotation.
        chemspider_token : str
            Your personal token for accessing the ChemSpider API. Make account there to obtain it.
        custom_page : bool
            When `use_gm` is False, this will set the page for all extracted compounds.
        continue_on_failure : bool
            | If True, continue running even if OSRA returns non-zero exit code.
            | If False and error occurs, print it and return.

        Returns
        -------
        dict
            Keys:

            - stdout: str ... standard output from OSRA
            - stderr: str ... standard error output from OSRA
            - exit_code: int ... exit code from OSRA
            - content:

                - list of OrderedDicts ... when `format_output` is True.
                - None ... when `format_output` is False

            | If `osra_output_format` is "sdf", additional information like 'bond_length' cannot be retrieved.
            | If `use_gm` is True then stdout, stderr and exit_code will be lists containing items from each temporary image
              extracted by OSRA.

        Notes
        -----
        Only with `format_output` set to True you can use molecule standardization and more molecule formats. Otherwise
        you will only get raw stdout from OSRA (which can also be written to file if `output_file` is set).
        """

        options_internal = self.options_internal.copy()
        osra_smiles_outputs = ["smi", "can"]

        # OSRA output format check
        if osra_output_format:
            options_internal["output_format"] = osra_output_format
        else:
            osra_output_format = options_internal["output_format"]

        osra_valid_output_formats = {
            "can": "smiles_can_osra",
            "smi": "smiles_osra",
            "sdf": "sdf_osra"
        }
        if osra_output_format not in osra_valid_output_formats:
            raise ValueError(
                "Unknown OSRA output format. Possible values: {}".format(
                    osra_valid_output_formats.values()))

        if osra_output_format == "sdf":
            self.logger.warning(
                "OSRA's output format is set to \"sdf\" so additional information like coordinates cannot be retrieved."
            )

        # output formats check
        is_output_sdf = False
        is_output_sdf_osra = False
        if not output_formats:
            output_formats = ["smiles"]
        else:
            output_formats = sorted(list(set(output_formats)))
            possible_output_formats = ["smiles", "inchi", "inchikey", "sdf"]
            output_formats = [
                x for x in output_formats if x in possible_output_formats
                or x == osra_valid_output_formats[osra_output_format]
            ]

            if ("sdf" in output_formats
                    or "sdf_osra" in output_formats) and not output_file_sdf:
                self.logger.warning(
                    "Cannot write SDF output: 'output_file_sdf' is not set.")
            if output_file_sdf:
                is_output_sdf = True
            if "sdf_osra" in output_formats and osra_output_format == "sdf" and output_file_sdf:
                is_output_sdf_osra = True
            if ("smiles_osra" in output_formats or "smiles_can_osra"
                    in output_formats) and osra_output_format == "sdf":
                try:
                    output_formats.remove("smiles_osra")
                except ValueError:
                    pass
                try:
                    output_formats.remove("smiles_can_osra")
                except ValueError:
                    pass
                self.logger.warning(
                    "SMILES or canonical SMILES output from OSRA is requested, but OSRA's output format is \"{}\"."
                    .format(osra_output_format))

        # input file type check
        possible_input_types = ["pdf", "image"]
        if not input_type:
            input_type = get_input_file_type(input_file)
            if input_type not in possible_input_types:
                use_gm = False
                self.logger.warning(
                    "Input file MIME type ('{}') is not one of {}. You can specify 'input_type' directly (see docstring)."
                    .format(input_type, possible_input_types))
        elif input_type not in possible_input_types:
            raise ValueError("Possible 'input_type' values are {}".format(
                possible_input_types))

        #options = ChainMap({k: v for k, v in {"images_prefix": images_prefix}.items() if v},
        #                   options_internal)

        if annotate:
            if not chemspider_token:
                self.logger.warning(
                    "Cannot perform annotation in ChemSpider: 'chemspider_token' is empty."
                )
            [
                output_formats.append(x)
                for x in ["smiles", "inchi", "inchikey"]
                if x not in output_formats
            ]
            output_formats = sorted(output_formats)

        commands, _, _ = self.build_commands(options_internal,
                                             self._OPTIONS_REAL,
                                             self.path_to_binary)
        commands.extend(
            ["--bond", "--coordinates", "--page", "--guess", "--print"])

        if dry_run:
            return " ".join(commands)

        osra_output_list = []
        if input_type == "image" or not use_gm:
            osra_output_list.append(
                self._process(input_file,
                              commands,
                              page=custom_page if custom_page else 1))
        elif input_type == "pdf":
            with tempfile.TemporaryDirectory() as temp_dir:
                stdout, stderr, exit_code = pdf_to_images(input_file,
                                                          temp_dir,
                                                          dpi=gm_dpi,
                                                          trim=gm_trim)
                osra_output_list = Parallel(n_jobs=n_jobs)(
                    delayed(self._process)(
                        temp_image_file, commands, page=page)
                    for temp_image_file, page in get_temp_images(temp_dir))

        # summarize OSRA results
        to_return = {
            "stdout": [],
            "stderr": [],
            "exit_code": [],
            "content": None,
            "pages": []
        }
        for result in osra_output_list:
            if result["stdout"]:
                to_return["stdout"].append(result["stdout"])
                to_return["stderr"].append(result["stderr"])
                to_return["exit_code"].append(result["exit_code"])
                to_return["pages"].append(result["page"])

        if not continue_on_failure:
            errors = [(page + 1, error)
                      for page, (exit_code, error) in enumerate(
                          zip(to_return["exit_code"], to_return["stderr"]))
                      if exit_code > 0]
            if errors:
                self.logger.warning("OSRA errors:")
                for page, error in errors:
                    eprint("\tError on page {}:".format(page))
                    eprint("\n\t\t".join("\n{}".format(error).splitlines()))
                return to_return

        if not format_output:
            if output_file:
                with open(output_file, mode="w", encoding="utf-8") as f:
                    f.write("\n".join(to_return["stdout"]))
            return to_return

        output_cols = OrderedDict([("bond_length", 1), ("resolution", 2),
                                   ("confidence", 3), ("page", 4),
                                   ("coordinates", 5)])

        if osra_output_format in osra_smiles_outputs:
            compound_template_dict = OrderedDict.fromkeys(
                output_formats + list(output_cols.keys()))
        else:
            compound_template_dict = OrderedDict.fromkeys(["page"] +
                                                          output_formats)

        if any(to_return["stdout"]):
            if standardize_mols:
                standardizer = Standardizer()

            compounds = []

            if is_output_sdf:
                if sdf_append:
                    if not os.path.isfile(output_file_sdf):
                        open(output_file_sdf, mode="w",
                             encoding="utf-8").close()
                    writer = SDWriter(
                        open(output_file_sdf, mode="a", encoding="utf-8"))
                else:
                    writer = SDWriter(output_file_sdf)

            for output, page in zip(to_return["stdout"], to_return["pages"]):
                if osra_output_format in osra_smiles_outputs:
                    lines = [x.strip() for x in output.split("\n") if x]
                else:
                    lines = [x for x in output.split("$$$$") if x.strip()]

                for line in lines:
                    """
                    # so much problems with --learn
                    # we can't simply split output by " " when --learn is present, because its output is like "1,2,2,2 1"
                    if "learn" in filtered_cols:
                        learn_start = filtered_cols.index("learn") + 1 #  "smiles" col isn't in output_cols
                        learn_end = filtered_cols.index("learn") + 1 + 3
                        line[learn_start:learn_end] = [" ".join(line[learn_start:learn_end])]
                    """

                    if not line:
                        continue

                    if osra_output_format in osra_smiles_outputs:
                        line = [x.strip() for x in line.split()]
                        if custom_page:
                            line[output_cols["page"]] = custom_page
                        elif use_gm:
                            line[output_cols["page"]] = page
                        mol = MolFromSmiles(
                            line[0],
                            sanitize=False if standardize_mols else True)
                    elif osra_output_format == "sdf":
                        line = "\n" + line.strip()
                        mol = MolFromMolBlock(
                            line,
                            strictParsing=False,
                            sanitize=False if standardize_mols else True,
                            removeHs=False if standardize_mols else True)

                    if mol:
                        compound = compound_template_dict.copy()

                        if standardize_mols:
                            try:
                                mol = standardizer.standardize(mol)
                            except ValueError as e:
                                self.logger.warning(
                                    "Cannot standardize '{}': {}".format(
                                        MolToSmiles(mol), str(e)))

                        for f in output_formats:
                            if f == "smiles":
                                compound["smiles"] = MolToSmiles(
                                    mol, isomericSmiles=True)
                            elif f == "smiles_osra" and osra_output_format == "smi":
                                compound["smiles_osra"] = line[0]
                            elif f == "smiles_can_osra" and osra_output_format == "can":
                                compound["smiles_can_osra"] = line[0]
                            elif f == "inchi":
                                inchi = MolToInchi(mol)
                                if inchi:
                                    compound["inchi"] = inchi
                                else:
                                    compound["inchi"] = ""
                                    self.logger.warning(
                                        "Cannot convert to InChI: {}".format(
                                            MolToSmiles(mol)))
                            elif f == "inchikey":
                                inchi = MolToInchi(mol)
                                if inchi:
                                    compound["inchikey"] = InchiToInchiKey(
                                        inchi)
                                else:
                                    compound["inchikey"] = ""
                                    self.logger.warning(
                                        "Cannot create InChI-key from InChI: {}"
                                        .format(MolToSmiles(mol)))
                            elif f == "sdf":
                                compound["sdf"] = MolToMolBlock(
                                    mol, includeStereo=True)
                            elif f == "sdf_osra":
                                compound["sdf_osra"] = line

                        if is_output_sdf:
                            writer.write(mol)

                        if osra_output_format in osra_smiles_outputs:
                            compound.update([(x[0], x[1]) for x in zip(
                                list(output_cols.keys()), line[1:])])
                        else:
                            compound[
                                "page"] = page if use_gm else custom_page if custom_page else 1

                        compounds.append(compound)
                    else:
                        self.logger.warning("Cannot convert to RDKit mol: " +
                                            line[0])

            if is_output_sdf_osra:
                with open(output_file_sdf + "-osra.sdf",
                          mode="w",
                          encoding="utf-8") as f:
                    f.write("".join(to_return["stdout"]))

            to_return["content"] = sorted(compounds, key=lambda x: x["page"])

            if annotate:
                chemspider = ChemSpider(
                    chemspider_token) if chemspider_token else None

                for i, ent in enumerate(to_return["content"]):
                    self.logger.info("Annotating entity {}/{}...".format(
                        i + 1, len(to_return["content"])))
                    ent.update(
                        OrderedDict([("pch_cids_by_inchikey", ""),
                                     ("chs_cids_by_inchikey", ""),
                                     ("pch_cids_by_smiles", ""),
                                     ("chs_cids_by_smiles", ""),
                                     ("pch_cids_by_inchi", ""),
                                     ("chs_cids_by_inchi", ""),
                                     ("pch_iupac_name", ""),
                                     ("chs_common_name", ""),
                                     ("pch_synonyms", "")]))

                    results = []

                    # prefer InChI key
                    if "inchikey" in ent and ent["inchikey"]:
                        try:
                            results = get_compounds(ent["inchikey"],
                                                    "inchikey")
                            if results:
                                if len(results) == 1:
                                    result = results[0]
                                    synonyms = result.synonyms
                                    if synonyms:
                                        ent["pch_synonyms"] = "\"{}\"".format(
                                            "\",\"".join(synonyms))
                                    ent["pch_iupac_name"] = result.iupac_name
                                ent["pch_cids_by_inchikey"] = "\"{}\"".format(
                                    ",".join([str(c.cid) for c in results]))
                        except (BadRequestError, NotFoundError,
                                PubChemHTTPError, ResponseParseError,
                                ServerError, TimeoutError, PubChemPyError):
                            pass

                        results = chemspider.search(
                            ent["inchikey"]) if chemspider_token else []
                        if results:
                            if len(results) == 1:
                                result = results[0]
                                ent["chs_common_name"] = result.common_name
                            ent["chs_cids_by_inchikey"] = "\"{}\"".format(
                                ",".join([str(c.csid) for c in results]))
                    else:
                        for search_field, col_pch, col_chs in [
                            ("smiles", "pch_cids_by_smiles",
                             "chs_cids_by_smiles"),
                            ("inchi", "pch_cids_by_inchi", "chs_cids_by_inchi")
                        ]:
                            results_pch = []
                            results_chs = []

                            if search_field == "smiles" and "smiles" in ent and ent[
                                    "smiles"] and "*" not in ent["smiles"]:
                                try:
                                    results_pch = get_compounds(
                                        ent["smiles"], "smiles")
                                except (BadRequestError, NotFoundError,
                                        PubChemHTTPError, ResponseParseError,
                                        ServerError, TimeoutError,
                                        PubChemPyError):
                                    pass
                                results_chs = chemspider.search(
                                    ent["smiles"]) if chemspider_token else []
                            elif search_field == "inchi" and "inchi" in ent and ent[
                                    "inchi"]:
                                try:
                                    results_pch = get_compounds(
                                        ent["inchi"], "inchi")
                                except (BadRequestError, NotFoundError,
                                        PubChemHTTPError, ResponseParseError,
                                        ServerError, TimeoutError,
                                        PubChemPyError):
                                    pass
                                results_chs = chemspider.search(
                                    ent["inchi"]) if chemspider_token else []

                            if results_pch:
                                ent[col_pch] = "\"{}\"".format(",".join(
                                    [str(c.cid) for c in results_pch]))
                            if results_chs:
                                ent[col_chs] = "\"{}\"".format(",".join(
                                    [str(c.csid) for c in results_chs]))

                            sleep(0.5)

            if output_file:
                dict_to_csv(to_return["content"],
                            output_file=output_file,
                            csv_delimiter=csv_delimiter,
                            write_header=write_header)

            if is_output_sdf:
                writer.close()
        elif not any(to_return["stdout"]) and output_file:
            write_empty_file(output_file,
                             csv_delimiter=csv_delimiter,
                             header=list(compound_template_dict.keys()),
                             write_header=write_header)

        return to_return
Beispiel #15
0
    def test1InchiReadPubChem(self):
        for f in self.dataset.values():
            same, diff, reasonable = 0, 0, 0
            for m in f:
                if m is None:  # pragma: nocover
                    continue
                x = MolToInchi(m)
                y = None
                RDLogger.DisableLog('rdApp.error')
                mol = MolFromInchi(x)
                RDLogger.EnableLog('rdApp.error')
                if mol is not None:
                    y = MolToInchi(
                        MolFromSmiles(MolToSmiles(mol, isomericSmiles=True)))
                if y is None:
                    # metal involved?
                    try:
                        MolToInchi(m, treatWarningAsError=True)
                    except InchiReadWriteError as inst:
                        _, error = inst.args
                        if 'Metal' in error or \
                                'Charges were rearranged' in error:
                            reasonable += 1
                            continue
                    # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage)
                    # RDKit does not like the SMILES? use MolBlock instead
                    inchiMol = MolFromInchi(x)
                    if inchiMol:
                        rdDepictor.Compute2DCoords(inchiMol)
                        z = MolToInchi(MolFromMolBlock(
                            MolToMolBlock(inchiMol)))
                        if x == z:
                            reasonable += 1
                            continue
                    # InChI messed up the radical?
                    unsanitizedInchiMol = MolFromInchi(x, sanitize=False)
                    if sum([
                            a.GetNumRadicalElectrons() * a.GetAtomicNum()
                            for a in m.GetAtoms()
                            if a.GetNumRadicalElectrons() != 0
                    ]) != sum([
                            a.GetNumRadicalElectrons() * a.GetAtomicNum()
                            for a in unsanitizedInchiMol.GetAtoms()
                            if a.GetNumRadicalElectrons() != 0
                    ]):
                        reasonable += 1
                        continue

                    diff += 1
                    cid = m.GetProp('PUBCHEM_COMPOUND_CID')
                    print(COLOR_GREEN + 'Empty mol for PubChem Compound ' +
                          cid + '\n' + COLOR_RESET)
                    continue
                if x != y:
                    # if there was warning in the first place, then this is
                    # tolerable
                    try:
                        MolToInchi(m, treatWarningAsError=True)
                        MolFromInchi(x, treatWarningAsError=True)
                    except InchiReadWriteError as inst:
                        reasonable += 1
                        continue
                    # or if there are big rings
                    SanitizeMol(m)
                    if filter(lambda i: i >= 8,
                              [len(r) for r in m.GetRingInfo().AtomRings()]):
                        reasonable += 1
                        continue
                    # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage)
                    # or if RDKit loses bond stereo
                    s = MolToSmiles(m, True)
                    if MolToSmiles(MolFromSmiles(s), True) != s:
                        reasonable += 1
                        continue
                    # or if it is RDKit SMILES writer unhappy about the mol
                    inchiMol = MolFromInchi(x)
                    rdDepictor.Compute2DCoords(inchiMol)
                    z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol)))
                    if x == z:
                        reasonable += 1
                        continue

                    diff += 1
                    print(COLOR_GREEN +
                          'Molecule mismatch for PubChem Compound ' + cid +
                          COLOR_RESET)
                    print(inchiDiff(x, y))
                    print()
                else:
                    same += 1
            fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}"
            print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET))
            self.assertEqual(same, 621)
            self.assertEqual(diff, 0)
            self.assertEqual(reasonable, 560)
Beispiel #16
0
    def save(self, force_insert=False, force_update=False, *args, **kwargs):

        changed = False
        new  =  not bool(CompoundStructures.objects.filter(pk=self.pk).count())
        if settings.OPEN_SOURCE:
            if self.molfile:
                if not new: # The structure already exists and we only want to modify it
                    super(CompoundStructures, self).save(force_insert, force_update, *args, **kwargs) # this should trigger CMPD_STR_UPDATE_TRIG, which deletes compound images and properties and nulls standard inchi, key, smiles, and molformula
                    changed = True
             #   newInchi = inchiFromPipe(self.molfile, settings.INCHI_BINARIES_LOCATION['1.02'])
                #if newInchi != self.standard_inchi:
                 #   self.standard_inchi = newInchi
                  #  changed = True
            mol = MolFromInchi(self.standard_inchi.encode("ascii"))
            if mol:
            # self.canonical_smiles = MolToSmiles(mol)
                if not self.standard_inchi:
                    raise NoStandardInchi("for CompundStructure, pk = " + str(self.pk))

                newInchiKey = InchiToInchiKey(self.standard_inchi.encode("ascii"))
                if self.standard_inchi_key != newInchiKey:
                    self.standard_inchi_key = newInchiKey
                    mol = MolFromInchi(self.standard_inchi.encode("ascii"))
                    # self.canonical_smiles = MolToSmiles(mol)
                    changed = True
                    self.molfile = MolToMolBlock(MolFromMolBlock(str(self.molfile))) # This is how we do kekulisation in RDKit...

                self.clean_fields()
                self.validate_unique()
                super(CompoundStructures, self).save(force_insert, force_update, *args, **kwargs)

        else:
            if self.molfile:
                if not new: # The structure already exists and we only want to modify it
                    super(CompoundStructures, self).save(force_insert, force_update, *args, **kwargs) # this should trigger CMPD_STR_UPDATE_TRIG, which deletes compound images and properties and nulls standard inchi, key, smiles, and molformula
                    changed = True

                data = getStructure(self.molfile)

                newInchi = data['InChI']
                if newInchi != self.standard_inchi:
                    self.standard_inchi = newInchi
                    self.standard_inchi_key = data['InChIKey']
                    #self.molformula = data['Molecular_Formula']
                    self.canonical_smiles = data['Canonical_Smiles']
                    changed = True

            if not self.standard_inchi:
                raise NoStandardInchi("for CompundStructure, pk = " + str(self.pk))

            if not self.standard_inchi_key:
                self.standard_inchi_key = InchiToInchiKey(self.standard_inchi.encode("ascii"))

            self.clean_fields()
            self.validate_unique()
            super(CompoundStructures, self).save(force_insert, force_update, *args, **kwargs)

        if changed:
            self.molecule.structure_key = self.standard_inchi_key
            self.molecule.structure_type = "MOL"
            self.molecule.molfile_update = datetime.now()
            self.molecule.save()
            structureChanged.send(sender=self.__class__, instance=self)
Beispiel #17
0
# Rescale Y matrix
y_train_raw = train[[prop]].values
y_valid_raw = valid[[prop]].values

y_scaler = RobustNanScaler()
y_train_scaled = y_scaler.fit_transform(y_train_raw)
y_valid_scaled = y_scaler.transform(y_valid_raw)

from rdkit.Chem import MolFromMolBlock

# Transform MOLs into X matrix
preprocessor = MolPreprocessor(n_neighbors=48)

train_inputs = preprocessor.fit(
    (MolFromMolBlock(mol) for _, mol in train.mol.iteritems()))
valid_inputs = preprocessor.fit(
    (MolFromMolBlock(mol) for _, mol in valid.mol.iteritems()))


def rbf_expansion(distances, mu=0, delta=0.2, kmax=150):
    k = np.arange(0, kmax)
    logits = -(np.atleast_2d(distances).T - (-mu + delta * k))**2 / delta
    return np.exp(logits)


def precalc_rbfs(inputs):

    for item in tqdm(inputs):

        item['distance_rbf'] = rbf_expansion(item['distance'])
Beispiel #18
0
def predict(request: HttpRequest) -> Union[HttpResponseBadRequest, JsonResponse]:
    """
    Returns a list of all available models in JSON format.

    Required POST parameters are either 'molblocks' containing a list of molblocks,
    or 'smiles' containing a list auf SMILES. Also required is 'models' with a list
    of model names which should be used for predictions.

    Parameters
    ----------
    request : HttpRequest
        Incoming POST request as HttpRequest instance

    Returns
    -------
    Union[HttpResponseBadRequest, JsonResponse]
        Returns a HttpResponseBadRequest instance if any required parameters
        are missing or if an error occurs. Otherwise a JsonResponse instance
        containing all predictions and probabilities (if available) is returned.
    """

    if 'molblocks' in request.POST and 'smiles' in request.POST \
            or 'molblocks' not in request.POST and 'smiles' not in request.POST:
        return HttpResponseBadRequest('molblocks OR smiles have to be provided')
    if 'models' not in request.POST:
        return HttpResponseBadRequest('model(s) have to be provided')
    model_names = request.POST.getlist('models')
    model_names = set(model_names)

    descs = []
    fps = []
    for model in model_names:
        if model in model_manager.models:
            descs.extend(model_manager.models[model].descriptors)
            fps.extend(model_manager.models[model].fingerprints)
        else:
            return HttpResponseBadRequest(f'{model} not available')

    descs = list(set(descs))
    fps = list(set(fps))
    mols = []
    if 'molblocks' in request.POST:
        molblocks = request.POST.getlist('molblocks')
        if not molblocks or molblocks == ['']:
            return HttpResponseBadRequest('List of molblocks is empty')
        for mb in molblocks:
            if mb == '':
                mols.append(None)
                break
            mols.append(MolFromMolBlock(mb))
    else:
        smiles = request.POST.getlist('smiles')
        if not smiles or smiles == ['']:
            return HttpResponseBadRequest('List of SMILES is empty')
        for smi in smiles:
            if smi == '':
                mols.append(None)
                break
            mols.append(MolFromSmiles(smi))

    if None in mols:
        return HttpResponseBadRequest('molblocks or smiles contain invalid entries')

    df = pd.DataFrame.from_dict(dict(ROMol=mols))
    df = rdkit_support.compute_descriptors(df, descs)
    bad_ix, bad_desc = rdkit_support.filter_descriptor_values(df[descs])
    if len(bad_ix) > 0:
        df.drop(index=bad_ix, inplace=True)
        if len(df) == 0:
            return HttpResponseBadRequest('Every molecule leads to bad descriptor values')
    df = fplib.compute_fingerprints(df, fps)
    df.drop(columns='ROMol', inplace=True)

    res = {ix: {} for ix in df.index}
    for model in model_names:
        m = model_manager.models[model]
        cols = copy(m.descriptors)
        for fp in m.fingerprints:
            for fp_col in df.columns[len(descs):]:
                if fp_col.startswith(f'{fp.alias}['):
                    cols.append(fp_col)
        pred = m.predict(df[cols])
        pred.columns = [col.replace(f'{model}_', '') for col in pred.columns]
        pred = pred.to_dict('index')
        for ix in pred:
            res[ix][model] = pred[ix]

    return JsonResponse(res)