Beispiel #1
0
def generate_fingerprint(smiles):
    if smiles:
        compound = Compound(compound_string=smiles, identifier_type='smiles', suppress_hydrogens=True)
        fingerprint = compound.get_bitmap_fingerprint()
        fp = {x for x in str(fingerprint)[1:-1].split(', ')}
        return list(fp)
    else:
        return []
Beispiel #2
0
 def get_compound_container(self, chem_id, suppress_hydrogens=True):
     try:
         return Compound(compound_string=chem_id, identifier_type='smiles', suppress_hydrogens=suppress_hydrogens)
     except ValueError as e:
         try:
             return Compound(compound_string=chem_id, identifier_type='inchi', suppress_hydrogens=suppress_hydrogens)
         except ValueError as e:
             raise e
Beispiel #3
0
def default_fingerprint_from_inchi(inchi):
    c = Compound(compound_string=inchi, identifier_type='inchi')
    # this should accommodate different types!
    fp = c.get_fingerprint()
    fp_size = 1024
    fp_array = numpy.zeros(fp_size)
    for fp_bit in range(fp_size):
        fp_array[fp_bit] = fp.get(fp_bit)
    return fp_array
Beispiel #4
0
def desalt_compound(smiles):
    desalted_smiles = []
    desalted_ikeys = []
    if smiles:
        for single_compound in smiles.split('.'):
            desalted_smiles.append(single_compound)
            try:
                compound = Compound(compound_string=single_compound, identifier_type='smiles')
                ikey = compound.get_inchi_key()
                desalted_ikeys.append(ikey)
            except Exception as e:
                desalted_ikeys.append('')

    return desalted_smiles, desalted_ikeys
Beispiel #5
0
    def get(self):
        args = request.args
        compound_structure = args['structure']

        if 'format' in args and args['format'] == 'inchikey':
            search_results = SearchAPI.exec_freetext_search(compound_structure, indices=('reframe'))
            for x in search_results:
                if x['id'] == compound_structure:
                    compound_structure = x['smiles']
                    break

        try:
            compound = Compound(compound_string=compound_structure, identifier_type='smiles')
        except Exception as e:
            try:
                print('executed inchi conv')
                compound = Compound(compound_string=compound_structure, identifier_type='inchi')
                compound.get_inchi_key()
            except Exception as e:
                print(e)
                response = {
                    'status': 'fail',
                    'message': 'Invalid SMILES or InChI'
                }
                return make_response(jsonify(response)), 400

        svg_xml = compound.get_svg()

        response = {
            'status': 'success',
            'compound_svg': svg_xml
        }

        return make_response(jsonify(response)), 200
Beispiel #6
0
    def get(self):
        args = request.args
        compound_structure = args['compound_structure']

        try:
            compound = Compound(compound_string=compound_structure, identifier_type='smiles')
        except Exception as e:
            try:
                compound = Compound(compound_string=compound_structure, identifier_type='inchi')
                compound.get_inchi_key()
            except Exception as e:
                print(e)
                response = {
                    'status': 'fail',
                    'message': 'Invalid SMILES or InChI'
                }
                return make_response(jsonify(response)), 400

        molfile = compound.get_molfile()

        response = {
            'status': 'success',
            'molfile': molfile
        }

        return make_response(jsonify(response)), 200
Beispiel #7
0
def main(datapath):
    data = scipy.io.loadmat(datapath + '/data_GNPS.mat')

    fp_size = 1024

    new_fps = []

    for i in data['inchi']:
        print('Processing sample %s' % i)
        inchi = i[0][0]
        print(inchi)
        c = Compound(compound_string=inchi, identifier_type='inchi')
        fp = c.get_fingerprint()
        fp_array = numpy.zeros(fp_size)
        for fp_bit in range(fp_size):
            fp_array[fp_bit] = fp.get(fp_bit)
        new_fps.append(fp_array)

    with open(datapath + '/cdk_fingerprints.bin', 'wb') as f:
        pickle.dump(new_fps, f)
Beispiel #8
0
def generate_fingerprint(smiles, compound_id, main_label, qid):
    if smiles:
        compound = Compound(compound_string=smiles,
                            identifier_type='smiles',
                            suppress_hydrogens=True)
        fingerprint = compound.get_bitmap_fingerprint()
        fp = {x for x in str(fingerprint)[1:-1].split(', ')}

        # if only compound id is set as a label, try to set something more useful
        if compound_id in compound_id_fp_map:
            sim_item = compound_id_fp_map[compound_id]
            if sim_item[1] == compound_id:
                sim_item[1] = main_label
        else:
            compound_id_fp_map.update(
                {compound_id: (compound_id, main_label, qid, fp)})

        return list(fp)
    else:
        return []
Beispiel #9
0
    def get(self):
        args = request.args
        compound_structure = args['structure']

        try:
            compound = Compound(compound_string=compound_structure,
                                identifier_type='smiles')
        except ValueError as e:
            try:
                compound = Compound(compound_string=compound_structure,
                                    identifier_type='inchi')
            except ValueError as e:
                response = {
                    'status': 'fail',
                    'message': 'Invalid SMILES or InChI'
                }
                return make_response(jsonify(response)), 401

        svg_xml = compound.get_svg()

        response = {'status': 'success', 'compound_svg': svg_xml}

        return make_response(jsonify(response)), 200
Beispiel #10
0
def fingerprint_from_inchi(inchi, fingerprint_type=None):
    c = Compound(compound_string=inchi, identifier_type='inchi')

    if fingerprint_type == 'cdk_default':
        fingerprinter = c.cdk.fingerprint.Fingerprinter()
    elif fingerprint_type == 'substructure':
        fingerprinter = c.cdk.fingerprint.SubstructureFingerprinter()
    elif fingerprint_type == 'klekota-roth':
        fingerprinter = c.cdk.fingerprint.KlekotaRothFingerprinter()
    else:
        raise SystemExit('Unknown fingerprint type: {}'.format(fingerprint_type))

    fp = fingerprinter.getBitFingerprint(c.mol_container)
    fp_size = fp.size()
    fp_array = numpy.zeros(fp_size)
    for fp_bit in range(fp_size):
        fp_array[fp_bit] = fp.get(fp_bit)
    return fp_array
Beispiel #11
0
def fingerprint_from_smiles(smiles, fingerprint_type=None):
    if fingerprint_type is None:
        fingerprint = numpy.array([])
        for fp_type in ('cdk_default', 'substructure', 'klekota-roth'):
            fingerprint = numpy.hstack((fingerprint, fingerprint_from_smiles(smiles, fp_type)))
        return fingerprint

    c = Compound(compound_string=smiles, identifier_type='smiles')

    if fingerprint_type == 'cdk_default':
        fingerprinter = c.cdk.fingerprint.Fingerprinter()
    elif fingerprint_type == 'substructure':
        fingerprinter = c.cdk.fingerprint.SubstructureFingerprinter()
    elif fingerprint_type == 'klekota-roth':
        fingerprinter = c.cdk.fingerprint.KlekotaRothFingerprinter()
    else:
        raise SystemExit('Unknown fingerprint type: {}'.format(fingerprint_type))

    fp = fingerprinter.getBitFingerprint(c.mol_container)
    fp_size = fp.size()
    fp_array = numpy.zeros(fp_size)
    for fp_bit in range(fp_size):
        fp_array[fp_bit] = fp.get(fp_bit)
    return fp_array
def main():
    test_inchis = [
        'InChI=1S/C23H18ClF2N3O3S/c1-2-9-33(31,32)29-19-8-7-18(25)20(21(19)26)22(30)17-12-28-23-16(17)10-14(11-27-23)13-3-5-15(24)6-4-13/h3-8,10-12,29H,2,9H2,1H3,(H,27,28)',
        'InChI=1S/C33H42N4O6/c1-7-20-19(6)32(42)37-27(20)14-25-18(5)23(10-12-31(40)41)29(35-25)15-28-22(9-11-30(38)39)17(4)24(34-28)13-26-16(3)21(8-2)33(43)36-26/h15,26-27,35H,7-14H2,1-6H3,(H,36,43)(H,37,42)(H,38,39)(H,40,41)/b28-15-/t26-,27-/m0/s1',
        'InChI=1S/C21H25ClFN3O3/c1-2-28-20-10-19(24)18(22)9-17(20)21(27)25-11-16-13-26(7-8-29-16)12-14-3-5-15(23)6-4-14/h3-6,9-10,16H,2,7-8,11-13,24H2,1H3,(H,25,27)',
        'InChI=1S/C16H12FN3O3/c1-19-14-7-6-10(20(22)23)8-12(14)16(18-9-15(19)21)11-4-2-3-5-13(11)17/h2-8H,9H2,1H3',
        'InChI=1S/C10H17N3O6S/c11-5(10(18)19)1-2-7(14)13-6(4-20)9(17)12-3-8(15)16/h5-6,20H,1-4,11H2,(H,12,17)(H,13,14)(H,15,16)(H,18,19)/t5-,6-/m0/s1',
        'InChI=1S/C13H16N2O/c1-8-13-11(5-6-14-8)10-4-3-9(16-2)7-12(10)15-13/h3-4,7-8,14-15H,5-6H2,1-2H3',
        'InChI=1S/C27H44O2/c1-19-10-13-23(28)18-22(19)12-11-21-9-7-17-27(5)24(14-15-25(21)27)20(2)8-6-16-26(3,4)29/h11-12,20,23-25,28-29H,1,6-10,13-18H2,2-5H3/b21-11+,22-12-/t20-,23+,24-,25+,27-/m1/s1',
        'InChI=1S/C40H56/c1-31(19-13-21-33(3)25-27-37-35(5)23-15-29-39(37,7)8)17-11-12-18-32(2)20-14-22-34(4)26-28-38-36(6)24-16-30-40(38,9)10/h11-14,17-23,25-28,37H,15-16,24,29-30H2,1-10H3/b12-11+,19-13+,20-14+,27-25+,28-26+,31-17+,32-18+,33-21+,34-22+/t37-/m0/s1',
        'InChI=1S/C11H14N4O5/c1-14-3-13-9-6(10(14)19)12-4-15(9)11-8(18)7(17)5(2-16)20-11/h3-5,7-8,11,16-18H,2H2,1H3',
        'InChI=1S/C27H44O2/c1-18(2)8-6-9-19(3)24-13-14-25-21(10-7-15-27(24,25)5)11-12-22-16-23(28)17-26(29)20(22)4/h11-12,18-19,23-26,28-29H,4,6-10,13-17H2,1-3,5H3/b21-11+,22-12-/t19-,23-,24-,25+,26+,27-/m1/s1',
        'InChI=1S/C9H14N5O4P/c1-6(18-5-19(15,16)17)2-14-4-13-7-8(10)11-3-12-9(7)14/h3-4,6H,2,5H2,1H3,(H2,10,11,12)(H2,15,16,17)/t6-/m1/s1',
        'InChI=1S/C51H79NO13/c1-30-16-12-11-13-17-31(2)42(61-8)28-38-21-19-36(7)51(60,65-38)48(57)49(58)52-23-15-14-18-39(52)50(59)64-43(33(4)26-37-20-22-40(53)44(27-37)62-9)29-41(54)32(3)25-35(6)46(56)47(63-10)45(55)34(5)24-30/h11-13,16-17,25,30,32-34,36-40,42-44,46-47,53,56,60H,14-15,18-24,26-29H2,1-10H3/b13-11+,16-12+,31-17+,35-25+/t30-,32-,33-,34-,36-,37+,38+,39+,40-,42+,43+,44-,46-,47+,51-/m1/s1'
    ]

    for inchi in test_inchis:

        cmpnd = Compound(compound_string=inchi, identifier_type='inchi')
        print(cmpnd.get_smiles())
        print(cmpnd.get_inchi_key())
        print(cmpnd.get_inchi())
        print(cmpnd.get_mol2())
        print(cmpnd.get_fingerprint())
        print('----------------------------')

    # group of compounds with same connectivity but different configuration:
    # https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/MNQDKWZEUULFPX-UHFFFAOYSA-M.html
    smiles = [
        '[Ba++].[O-][Fe]([O-])(=O)=O',
        'CCN1C2=CC=CC=C2SC1=CC=CC=CC3=[N+](C4=CC=CC=C4S3)CC.[I-]',
        'CCN\\1C2=CC=CC=C2S/C1=C\C=C\C=C\C3=[N+](C4=CC=CC=C4S3)CC.[I-]',
        'CCN\\1C2=CC=CC=C2S/C1=C/C=C/C=C/C3=[N+](C4=CC=CC=C4S3)CC.[I-]',
        'CCN\\1C2=CC=CC=C2S/C1=C\\C=C\\C=C/C3=[N+](C4=CC=CC=C4S3)CC.[I-]',
        'CCN\\1C2=CC=CC=C2S/C1=C/C=C/C=CC3=[N+](C4=CC=CC=C4S3)CC.[I-]',
        'CC1=CC=CC=C1OCC2=CC=CC=C2/C(=N\OC)/C(=O)OC',
        'CCCCCC/C=C\CCCCCCCC(=O)O',
        'CC(C)(C)c1nc(c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(s1)c4ccnc(N)n4',
        'CC(C)(C)C1=NC(C2=CC=CC(NS(=O)(=O)N(C)(CC))=C2F)=C(S1)C4=CC=NC(N)=N4',
        'C1=CC2=C(C=C1O)C(=CN2)C[C@@H](C(=O)[O-])[NH3+]',
        'CN/C(=C\[N+](=O)[O-])/NCCSCC1=CC=C(O1)CN(C)C',
        'CN/C(=C/[N+](=O)[O-])/NCCSCC1=CC=C(O1)CN(C)C',
        'COCCOC[C@H](CC1(CCCC1)C(=O)N[C@@H]2CC[C@@H](CC2)C(=O)O)C(=O)Oc3ccc4CCCc4c3',
        'C1=C(N=C(S1)N=C(N)N)CSCC/C(=N/S(=O)(=O)N)/N',
        'C[C@]([C@H]1C[C@@]23CC[C@@]1([C@H]4[C@@]25CCN([C@@H]3CC6=C5C(=C(C=C6)O)O4)CC7CC7)OC)(C(C)(C)CC)O',
        'CC(=O)O[Hg]c1cc(ccc1O)C(CC(C)(C)C)(C)C',
        'CC(=O)O.CC(C)(C)CC(C)(C)[C]1C=CC(=C=C1)[O-].[Hg+]',
        'N/C(N)=C([N+]([O-])=O)\[N+]([O-])=O',
        'CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4',
        'c1cc(ccc1/N=N/c2ccc(c(c2)OS(=O)O)N)OS(=O)O.[Na+].[Na+]',
        'Clc1ccc2Nc4ccccc4C(=N\c2c1)/N3CCNCC3', '[Yb][Yb][Yb][Ag][Ag]',
        'N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O'
        'CC1(C\\2CCC1(C(=O)/C2=C/c3ccc(cc3)C=O)CS(=O)(=O)[O-])C.[Na+]',
        'CNC(=O)C1=CC=CC=C1NC2=NC(=NC=C2Cl)NC3=CC=C(C=C3)N4CCN(CCCN)CC4',
        'OC(=O)CN/C(=N\c1ccc(C#N)cc1)NC2CCCCCCCC2',
        'N[C@@]12C[C@]3(O[N+]([O-])=O)C[C@@](C2)(CC)C[C@@](C1)(CC)C3',
        'C1C2CC3CC1(ON(OO))CC(C2)(C3)N',
        '[N+](=O)([O-])OC12CC3(CC(CC(C1)(C3)N)(C2)CC)CC',
        'COc1cc(c(cc1C(=O)N[C@@H]2CC[N@@]3CCC[C@H]2C3)Cl)N',
        'OCN(C(=O)N(CO)C)', '[O-][n+]1cc[n+](c2c1cccc2)[O-]',
        '[2H]C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C(O)=O',
        'CS(O)(=O)=O.[H][C@@]12CC(C)C(C(=O)CN3CCN(CC3)c3cc(nc(n3)N3CCCC3)N3CCCC3)[C@@]1(C)CC=C1[C@@]2([H])CCC2=CC(=O)C=C[C@]12C',
        'OCCCC(O)=O',
        'Cc1nnc(s1)SCC2=C(N3[C@@H]([C@@H](C3=O)NC(=O)Cn4cnnn4)SC2)C(=O)[O-]',
        'CC(=O)Oc1ccc(cc1)C(c1ccc(OC(C)=O)cc1)c1ccccn1'
    ]

    for smile in smiles:
        try:
            cmpnd = Compound(compound_string=smile, identifier_type='smiles')
            print(cmpnd.get_smiles(smiles_type='isomeric'))
            print(cmpnd.get_smiles(smiles_type='unique'))
            print(cmpnd.get_smiles(smiles_type='absolute'))
            print(cmpnd.get_smiles(smiles_type='generic'))
            print(cmpnd.get_inchi_key())
            print(cmpnd.get_inchi())
            print(cmpnd.get_mol2())
            print(cmpnd.get_fingerprint())
            print(
                cmpnd.get_tanimoto(
                    Compound(compound_string='C1C2CC3CC1(ON(OO))CC(C2)(C3)N',
                             identifier_type='smiles')))
            print(
                cmpnd.get_tanimoto_from_bitset(
                    Compound(compound_string='C1C2CC3CC1(ON(OO))CC(C2)(C3)N',
                             identifier_type='smiles')))
            print(cmpnd.get_molfile())
            print('----------------------------')

        except ValueError as e:
            print(e)

    cmpnd = Compound(
        compound_string=
        'InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H2,7,8)(H,9,10)/p-1',
        identifier_type='inchi')
    print(cmpnd.get_smiles(smiles_type='generic'))
    print(cmpnd.get_inchi_key())
    print(cmpnd.get_inchi())
Beispiel #13
0
# Convert MIBiG SMILES to InChI keys
mibig_inchi_keys = {}
failing_smiles = []
cnt = 0
with open('compunds_structures_2.0.csv', 'r') as f:
    for l in csv.reader(f):
        if cnt is 0:
            cnt += 1
            continue
        if len(l) is 0:
            continue
        mibig_id, compound_name, smiles, pubchem_id = l
        if smiles == '':
            continue
        # print(smiles)
        mibig_compound = Compound(compound_string=smiles,
                                  identifier_type='smiles')
        try:
            mibig_inchi_key = mibig_compound.get_inchi_key()
        except:
            failing_smiles.append(l)
            continue
        mibig_inchi_first_block = mibig_inchi_key.split('-')[0]
        if mibig_inchi_first_block in mibig_inchi_keys:
            mibig_inchi_keys[mibig_inchi_first_block].append(
                ((mibig_id, compound_name, pubchem_id), mibig_inchi_key,
                 smiles))  #.split('_')[0])
        else:
            mibig_inchi_keys[mibig_inchi_first_block] = [
                ((mibig_id, compound_name, pubchem_id), mibig_inchi_key,
                 smiles)
            ]  #.split('_'[0])]