예제 #1
0
    def test_targets_to_mols(self):

        ret = targets_to_mols(
            [resolve_target(x)[0]['target_chembl_id'] for x in ['POLK']])
        serialiser = get_serializer('chembl_id')
        self.assertTrue({
            'CHEMBL6', 'CHEMBL25', 'CHEMBL10', 'CHEMBL30', 'CHEMBL50',
            'CHEMBL71', 'CHEMBL28', 'CHEMBL66'
        }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))

        ret = targets_to_mols(
            [resolve_target(x)[0]['target_chembl_id'] for x in ['HERG']])
        serialiser = get_serializer('smiles')
        self.assertTrue({
            'CN1C(=O)N(CC(=O)c2ccccc2)C(=O)c3c1nc(N4CCC[C@H](N)C4)n3CC=C(C)C',
            'C[C@]12CCC(=O)C=C1CC[C@@H]3[C@@H]2CC[C@@]4(C)[C@H]3CC[C@@]4(O)C#C',
            'CS(=O)(=O)N1CCC(CN([C@@H]2CC[C@@]3(CC3C2)c4cccc(c4)C#N)C(=O)Nc5ccc(F)c(F)c5)CC1',
            'O=S(=O)(NCCCN1CCN(CC1)c2nsc3ccccc23)c4cccc5scnc45',
            'Cl.CC(C)n1nc(C(=O)NCC2CCN(CCNS(=O)(=O)C)CC2)c3ccccc13',
            'CC1CN(CC(=O)N[C@@H]2C3CC4CC2C[C@@](C4)(C3)C(=O)N)S(=O)(=O)N(C1)c5c(Cl)cc(Cl)cc5Cl',
            'Oc1cccc2CC(CN3CCC4(CC3)CCc5ccccc45)NCc12',
            '[O-][N+](=O)c1ccc(cc1)c2nc(c3ccc(F)cc3)c([nH]2)c4ccncc4'
        }.issubset(set(serialiser.serialize_line(ret).strip().split())))

        ret = targets_to_mols(
            [resolve_target(x)[0]['target_chembl_id'] for x in ['CMKAR1']])
        serialiser = get_serializer('inchi')
        self.assertTrue({
            'InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)',
            'InChI=1S/C11H7NS/c13-8-12-11-7-3-5-9-4-1-2-6-10(9)11/h1-7H',
            'InChI=1S/CHCl3/c2-1(3)4/h1H',
            'InChI=1S/H4N2/c1-2/h1-2H2',
            'InChI=1S/C8H8N4/c9-11-8-7-4-2-1-3-6(7)5-10-12-8/h1-5H,9H2,(H,11,12)',
            'InChI=1S/CH4N2O2/c2-1(4)3-5/h5H,(H3,2,3,4)',
            'InChI=1S/CH3O5P/c2-1(3)7(4,5)6/h(H,2,3)(H2,4,5,6)',
            'InChI=1S/C4H11NO3/c5-4(1-6,2-7)3-8/h6-8H,1-3,5H2',
            'InChI=1S/C9H9Cl2N3O/c10-6-2-1-3-7(11)5(6)4-8(15)14-9(12)13/h1-3H,4H2,(H4,12,13,14,15)',
        }.issubset(set(serialiser.serialize_line(ret).strip().split('\t'))))

        ret = targets_to_mols([
            resolve_target(x)[0]['target_chembl_id']
            for x in ['ADRA2L1', 'VIPR1']
        ])
        serialiser = get_serializer('inchi_key')
        self.assertTrue({
            'OELFLUMRDSZNSF-BRWVUGGUSA-N',
            'KJPRLNWUNMBNBZ-QPJJXVBHSA-N',
            'VEPKQEUBKLEPRA-UHFFFAOYSA-N',
            'ACGUYXCXAPNIKK-UHFFFAOYSA-N',
            'MVGSNCBCUWPVDA-MFOYZWKCSA-N',
            'JZHXLYHEWUWHQL-LVYIWIAJSA-N',
            'CUVBGWMAORETGV-UHFFFAOYSA-N',
            'DVSMVUMYJDOPJQ-UHFFFAOYSA-N',
        }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))
예제 #2
0
def main():

    options = get_options()
    with open(options.input) if options.input else sys.stdin as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            name = line.strip()
            if not name:
                continue
            resolved = None
            try:
                resolved = resolve(name, options.single)
            except Exception as e:
                pass
            if options.parent:
                resolved = get_parents(resolved)
            out_f.write(
                serializer_cls.serialize_line(resolved,
                                              human=options.human,
                                              name=name))
예제 #3
0
 def test_inchi_serialiser(self):
     serialiser = get_serializer('inchi')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'Name:\tInChI:\n')
     self.assertEqual(
         serialiser.serialize_line(mols),
         'InChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11'
         '-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1\tInChI=1S/C21'
         'H24FN3O4.ClH/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11-3-2-6-2'
         '3-16(11)10-24;/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28);1H/t11-,16+;/m0./s1\n'
     )
     self.assertEqual(
         serialiser.serialize_line(mols, human=True),
         'InChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11'
         '-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1\tInChI=1S/C21'
         'H24FN3O4.ClH/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11-3-2-6-2'
         '3-16(11)10-24;/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28);1H/t11-,16+;/m0./s1\n'
     )
     self.assertEqual(
         serialiser.serialize_line(mols, human=True, name='Vigamox'),
         'Vigamox\tInChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(2'
         '0)24-8-11-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1\tInC'
         'hI=1S/C21H24FN3O4.ClH/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-1'
         '1-3-2-6-23-16(11)10-24;/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28);1H/t11-,16+;/m0./s1\n'
     )
예제 #4
0
def main():
    options = get_options()

    with open(options.input) if options.input else sys.stdin as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.dest_format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.dest_format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            chunk = line.strip().split()[0]
            identifiers = chunk.strip().split(',')
            valid_identifiers = list()
            for identifier in identifiers:
                target = resolve_target(identifier)
                if not target:
                    continue
                valid_identifiers.append(target)
            mols = targets_to_mols(valid_identifiers,
                                   only_ids=(options.dest_format == 'chembl_id'),
                                   include_parents=options.parent,
                                   chunk_size=int(options.chunk))
            out_f.write(serializer_cls.serialize_line(mols, human=options.human, name=','.join(valid_identifiers)))
def main():

    options = get_options()
    with open(options.input) if options.input else sys.stdin as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            name = line.strip()
            if not name:
                continue
            resolved = None
            try:
                resolved = resolve(name, options.single)
            except Exception as e:
                pass
            if options.parent:
                resolved = get_parents(resolved)
            out_f.write(serializer_cls.serialize_line(resolved, human=options.human, name=name))
    def test_targets_to_mols(self):

        ret = targets_to_mols([resolve_target(x)[0]['target_chembl_id'] for x in ['POLK']])
        serialiser = get_serializer('chembl_id')
        self.assertTrue({'CHEMBL6', 'CHEMBL25', 'CHEMBL10', 'CHEMBL30', 'CHEMBL50', 'CHEMBL71', 'CHEMBL28', 'CHEMBL66'
                          }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))

        ret = targets_to_mols([resolve_target(x)[0]['target_chembl_id'] for x in ['HERG']])
        serialiser = get_serializer('smiles')
        self.assertTrue({'CN1C(=O)N(CC(=O)c2ccccc2)C(=O)c3c1nc(N4CCC[C@H](N)C4)n3CC=C(C)C',
                         'C[C@]12CCC(=O)C=C1CC[C@@H]3[C@@H]2CC[C@@]4(C)[C@H]3CC[C@@]4(O)C#C',
                         'CS(=O)(=O)N1CCC(CN([C@@H]2CC[C@@]3(CC3C2)c4cccc(c4)C#N)C(=O)Nc5ccc(F)c(F)c5)CC1',
                         'O=S(=O)(NCCCN1CCN(CC1)c2nsc3ccccc23)c4cccc5scnc45',
                         'Cl.CC(C)n1nc(C(=O)NCC2CCN(CCNS(=O)(=O)C)CC2)c3ccccc13',
                         'CC1CN(CC(=O)N[C@@H]2C3CC4CC2C[C@@](C4)(C3)C(=O)N)S(=O)(=O)N(C1)c5c(Cl)cc(Cl)cc5Cl',
                         'Oc1cccc2CC(CN3CCC4(CC3)CCc5ccccc45)NCc12',
                         '[O-][N+](=O)c1ccc(cc1)c2nc(c3ccc(F)cc3)c([nH]2)c4ccncc4'
                         }.issubset(set(serialiser.serialize_line(ret).strip().split())))

        ret = targets_to_mols([resolve_target(x)[0]['target_chembl_id'] for x in ['CMKAR1']])
        serialiser = get_serializer('inchi')
        self.assertTrue({
                            'InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)',
                            'InChI=1S/C11H7NS/c13-8-12-11-7-3-5-9-4-1-2-6-10(9)11/h1-7H',
                            'InChI=1S/CHCl3/c2-1(3)4/h1H',
                            'InChI=1S/H4N2/c1-2/h1-2H2',
                            'InChI=1S/C8H8N4/c9-11-8-7-4-2-1-3-6(7)5-10-12-8/h1-5H,9H2,(H,11,12)',
                            'InChI=1S/CH4N2O2/c2-1(4)3-5/h5H,(H3,2,3,4)',
                            'InChI=1S/CH3O5P/c2-1(3)7(4,5)6/h(H,2,3)(H2,4,5,6)',
                            'InChI=1S/C4H11NO3/c5-4(1-6,2-7)3-8/h6-8H,1-3,5H2',
                            'InChI=1S/C9H9Cl2N3O/c10-6-2-1-3-7(11)5(6)4-8(15)14-9(12)13/h1-3H,4H2,(H4,12,13,14,15)',
                        }.issubset(set(serialiser.serialize_line(ret).strip().split('\t'))))

        ret = targets_to_mols([resolve_target(x)[0]['target_chembl_id'] for x in ['ADRA2L1', 'VIPR1']])
        serialiser = get_serializer('inchi_key')
        self.assertTrue({
                            'OELFLUMRDSZNSF-BRWVUGGUSA-N',
                            'KJPRLNWUNMBNBZ-QPJJXVBHSA-N',
                            'VEPKQEUBKLEPRA-UHFFFAOYSA-N',
                            'ACGUYXCXAPNIKK-UHFFFAOYSA-N',
                            'MVGSNCBCUWPVDA-MFOYZWKCSA-N',
                            'JZHXLYHEWUWHQL-LVYIWIAJSA-N',
                            'CUVBGWMAORETGV-UHFFFAOYSA-N',
                            'DVSMVUMYJDOPJQ-UHFFFAOYSA-N',
                         }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))
    def test_sdf_serialiser(self):

        serialiser = get_serializer('sdf')
        mols = resolve('Vigamox')
        buf = StringIO()
        serialiser.write_header(buf)
        buf.seek(0)
        self.assertEqual(buf.read(), '')
        self.assertEqual(serialiser.serialize_line(mols), sdf, serialiser.serialize_line(mols))
        self.assertEqual(serialiser.serialize_line(mols, human=True), sdf)
        self.assertEqual(serialiser.serialize_line(mols, human=True, name='Vigamox'), sdf)
        serialiser = get_serializer('mol')
        mols = resolve('Vigamox')
        buf = StringIO()
        serialiser.write_header(buf)
        buf.seek(0)
        self.assertEqual(buf.read(), '')
        self.assertEqual(serialiser.serialize_line(mols), sdf, serialiser.serialize_line(mols))
        self.assertEqual(serialiser.serialize_line(mols, human=True), sdf)
        self.assertEqual(serialiser.serialize_line(mols, human=True, name='Vigamox'), sdf)
 def test_chembl_id_serialiser(self):
     serialiser = get_serializer('chembl_id')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'Name:\tChEMBL ID:\n')
     self.assertEqual(serialiser.serialize_line(mols), 'CHEMBL32,CHEMBL1200735\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True), 'CHEMBL32,CHEMBL1200735\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True, name='Vigamox'),
                      'Vigamox\tCHEMBL32,CHEMBL1200735\n')
 def test_chembl_key_serialiser(self):
     serialiser = get_serializer('inchi_key')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'Name:\tInChI Key:\n')
     self.assertEqual(serialiser.serialize_line(mols), 'FABPRXSRWADJSP-MEDUHNTESA-N,IDIIJJHBXUESQI-DFIJPDEKSA-N\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True),
                      'FABPRXSRWADJSP-MEDUHNTESA-N,IDIIJJHBXUESQI-DFIJPDEKSA-N\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True, name='Vigamox'),
                      'Vigamox\tFABPRXSRWADJSP-MEDUHNTESA-N,IDIIJJHBXUESQI-DFIJPDEKSA-N\n')
예제 #10
0
    def test_mols_to_targets(self):
        ret = mols_to_targets(['CHEMBL819'], organism='Escherichia coli')
        serialiser = get_serializer('uniprot')
        self.assertEqual(set(serialiser.serialize_line(ret).strip().split(',')), {'A1E3K9', 'P62593', 'P35695'})

        ret = mols_to_targets(['CHEMBL819'], organism='Escherichia coli', include_parents=True)
        serialiser = get_serializer('uniprot')
        self.assertEqual(set(serialiser.serialize_line(ret).strip().split(',')),
                         {'A1E3K9', 'P62593', 'P35695', 'P00811'})

        ret = mols_to_targets(['CHEMBL25', 'CHEMBL1'])
        serialiser = get_serializer('gene_name')
        self.assertTrue({'POLK', 'ADRA2RL2', 'CCR2', 'CHRM5', 'ADRA2L1', 'CMKAR1', 'PTPRC', 'HADH2', 'ITGAB', 'VIPR1'
                         }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))

        ret = mols_to_targets(['CHEMBL2', 'CHEMBL1737'])
        serialiser = get_serializer('chembl_id')
        targets_a = set(serialiser.serialize_line(ret).strip().split(','))
        self.assertTrue({'CHEMBL213', 'CHEMBL1916', 'CHEMBL205', 'CHEMBL4071', 'CHEMBL1909043', 'CHEMBL2622',
                         }.issubset(targets_a))

        ret = mols_to_targets(['CHEMBL2', 'CHEMBL1737'], only_ids=True)
        serialiser = get_serializer('chembl_id')
        targets_b = set(serialiser.serialize_line(ret).strip().split(','))
        self.assertTrue({'CHEMBL213', 'CHEMBL1916', 'CHEMBL205', 'CHEMBL4071', 'CHEMBL1909043', 'CHEMBL2622',
                         }.issubset(targets_b))

        self.assertEqual(targets_a, targets_b)

        drugs = ['Viagra', 'Gleevec']

        ret = mols_to_targets([resolve(x)[0]['molecule_chembl_id'] for x in drugs])
        serialiser = get_serializer('uniprot')
        self.assertTrue({'Q9BQI3', 'P00523', 'O15111', 'P22612', 'P25021', 'P16591', 'Q13153', 'Q8WXR4', 'P28564',
                         }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))
예제 #11
0
def main():
    similarity = new_client.similarity
    options = get_options()

    try:
        threshold = int(options.threshold)
        if threshold not in range(70, 101):
            sys.stderr.write('Threshold should be an integer in range [70-100]')
            return
        threshold = str(threshold)
    except:
        sys.stderr.write('Threshold should be an integer in range [70-100]')
        return

    source_format = options.source_format.lower()
    if source_format not in AVAILABLE_SOURCE_FORMATS:
        sys.stderr.write('Unsupported source format', options.source_format)
        return

    inp = sys.stdin
    if source_format == 'sdf':
        with open(options.input) if options.input else sys.stdin as in_f:
            options.input = None
            inp = convert_to_smiles(in_f)

    with open(options.input) if options.input else inp as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.dest_format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.dest_format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            if not line or line.lower().startswith('smiles'):
                continue
            chunk = line.strip().split()[0]
            identifiers = chunk.strip().split(',')
            valid_identifiers = list()
            sim = list()
            for identifier in identifiers:
                if chembl_id_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sim.extend(list(similarity.filter(chembl_id=identifier, similarity=threshold)))
                elif smiles_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sim.extend(list(similarity.filter(smiles=identifier, similarity=threshold)))
            sim = list({v['molecule_chembl_id']: v for v in sim}.values())
            out_f.write(serializer_cls.serialize_line(sim, human=options.human, name=','.join(valid_identifiers)))
예제 #12
0
 def test_chembl_id_serialiser(self):
     serialiser = get_serializer('chembl_id')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'Name:\tChEMBL ID:\n')
     self.assertEqual(serialiser.serialize_line(mols),
                      'CHEMBL32,CHEMBL1200735\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True),
                      'CHEMBL32,CHEMBL1200735\n')
     self.assertEqual(
         serialiser.serialize_line(mols, human=True, name='Vigamox'),
         'Vigamox\tCHEMBL32,CHEMBL1200735\n')
예제 #13
0
    def test_sdf_serialiser(self):

        serialiser = get_serializer('sdf')
        mols = resolve('Vigamox')
        buf = StringIO()
        serialiser.write_header(buf)
        buf.seek(0)
        self.assertEqual(buf.read(), '')
        self.assertEqual(serialiser.serialize_line(mols), sdf,
                         serialiser.serialize_line(mols))
        self.assertEqual(serialiser.serialize_line(mols, human=True), sdf)
        self.assertEqual(
            serialiser.serialize_line(mols, human=True, name='Vigamox'), sdf)
        serialiser = get_serializer('mol')
        mols = resolve('Vigamox')
        buf = StringIO()
        serialiser.write_header(buf)
        buf.seek(0)
        self.assertEqual(buf.read(), '')
        self.assertEqual(serialiser.serialize_line(mols), sdf,
                         serialiser.serialize_line(mols))
        self.assertEqual(serialiser.serialize_line(mols, human=True), sdf)
        self.assertEqual(
            serialiser.serialize_line(mols, human=True, name='Vigamox'), sdf)
예제 #14
0
 def test_smiles_serialiser(self):
     serialiser = get_serializer('smi')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'SMILES Name\n')
     self.assertEqual(serialiser.serialize_line(mols),
                      'COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n'
                      'Cl.COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True),
                      'COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n'
                      'Cl.COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True, name='Vigamox'),
                      'COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O Vigamox\n'
                      'Cl.COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O Vigamox\n')
예제 #15
0
 def test_chembl_key_serialiser(self):
     serialiser = get_serializer('inchi_key')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'Name:\tInChI Key:\n')
     self.assertEqual(
         serialiser.serialize_line(mols),
         'FABPRXSRWADJSP-MEDUHNTESA-N,IDIIJJHBXUESQI-DFIJPDEKSA-N\n')
     self.assertEqual(
         serialiser.serialize_line(mols, human=True),
         'FABPRXSRWADJSP-MEDUHNTESA-N,IDIIJJHBXUESQI-DFIJPDEKSA-N\n')
     self.assertEqual(
         serialiser.serialize_line(mols, human=True, name='Vigamox'),
         'Vigamox\tFABPRXSRWADJSP-MEDUHNTESA-N,IDIIJJHBXUESQI-DFIJPDEKSA-N\n'
     )
def main():
    substructure = new_client.substructure
    options = get_options()

    source_format = options.source_format.lower()
    if source_format not in AVAILABLE_SOURCE_FORMATS:
        sys.stderr.write('Unsupported source format', options.source_format)
        return

    inp = sys.stdin
    if source_format == 'sdf':
        with open(options.input) if options.input else sys.stdin as in_f:
            options.input = None
            inp = convert_to_smiles(in_f)

    with open(options.input) if options.input else inp as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.dest_format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.dest_format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            if not line or line.lower().startswith('smiles'):
                continue
            chunk = line.strip().split()[0]
            identifiers = chunk.strip().split(',')
            valid_identifiers = list()
            sub = list()
            for identifier in identifiers:
                if chembl_id_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sub.extend(list(substructure.filter(chembl_id=identifier)))
                elif smiles_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sub.extend(list(substructure.filter(smiles=identifier)))
            sub = list({v['molecule_chembl_id']: v for v in sub}.values())
            out_f.write(
                serializer_cls.serialize_line(
                    sub, human=options.human,
                    name=','.join(valid_identifiers)))
예제 #17
0
def main():
    substructure = new_client.substructure
    options = get_options()

    source_format = options.source_format.lower()
    if source_format not in AVAILABLE_SOURCE_FORMATS:
        sys.stderr.write('Unsupported source format', options.source_format)
        return

    inp = sys.stdin
    if source_format == 'sdf':
        with open(options.input) if options.input else sys.stdin as in_f:
            options.input = None
            inp = convert_to_smiles(in_f)

    with open(options.input) if options.input else inp as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.dest_format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.dest_format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            if not line or line.lower().startswith('smiles'):
                continue
            chunk = line.strip().split()[0]
            identifiers = chunk.strip().split(',')
            valid_identifiers = list()
            sub = list()
            for identifier in identifiers:
                if chembl_id_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sub.extend(list(substructure.filter(chembl_id=identifier)))
                elif smiles_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sub.extend(list(substructure.filter(smiles=identifier)))
            sub = list({v['molecule_chembl_id']: v for v in sub}.values())
            out_f.write(serializer_cls.serialize_line(sub, human=options.human, name=','.join(valid_identifiers)))
예제 #18
0
def main():
    options = get_options()

    source_format = options.source_format.lower()
    if source_format not in AVAILABLE_SOURCE_FORMATS:
        sys.stderr.write('Unsupported source format', options.source_format)
        return

    inp = sys.stdin
    if source_format == 'sdf':
        with open(options.input) if options.input else sys.stdin as in_f:
            options.input = None
            inp = convert_to_smiles(in_f)

    with open(options.input) if options.input else inp as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.dest_format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.dest_format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            if not line or line.lower().startswith('smiles'):
                continue
            chunk = line.strip().split()[0]
            identifiers = chunk.strip().split(',')
            valid_identifiers = list()
            for identifier in identifiers:
                if chembl_id_regex.match(identifier):
                    valid_identifiers.append(identifier)
                elif smiles_regex.match(identifier):
                    valid_identifiers.extend([x['molecule_chembl_id'] for x in resolve(identifier)])
            targets = mols_to_targets(valid_identifiers,
                                      organism=options.organism,
                                      only_ids=(options.dest_format == 'chembl_id'),
                                      include_parents=options.parent,
                                      chunk_size=int(options.chunk))
            out_f.write(serializer_cls.serialize_line(targets, human=options.human, name=','.join(valid_identifiers)))
예제 #19
0
 def test_inchi_serialiser(self):
     serialiser = get_serializer('inchi')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'Name:\tInChI:\n')
     self.assertEqual(serialiser.serialize_line(mols),
                      'InChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11'
                      '-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1\tInChI=1S/C21'
                      'H24FN3O4.ClH/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11-3-2-6-2'
                      '3-16(11)10-24;/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28);1H/t11-,16+;/m0./s1\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True),
                      'InChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11'
                      '-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1\tInChI=1S/C21'
                      'H24FN3O4.ClH/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11-3-2-6-2'
                      '3-16(11)10-24;/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28);1H/t11-,16+;/m0./s1\n')
     self.assertEqual(serialiser.serialize_line(mols, human=True, name='Vigamox'),
                      'Vigamox\tInChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(2'
                      '0)24-8-11-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1\tInC'
                      'hI=1S/C21H24FN3O4.ClH/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-1'
                      '1-3-2-6-23-16(11)10-24;/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28);1H/t11-,16+;/m0./s1\n')
예제 #20
0
 def test_smiles_serialiser(self):
     serialiser = get_serializer('smi')
     mols = resolve('Vigamox')
     buf = StringIO()
     serialiser.write_header(buf)
     buf.seek(0)
     self.assertEqual(buf.read(), 'SMILES Name\n')
     self.assertEqual(
         serialiser.serialize_line(mols),
         'COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n'
         'Cl.COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n'
     )
     self.assertEqual(
         serialiser.serialize_line(mols, human=True),
         'COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n'
         'Cl.COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O\n'
     )
     self.assertEqual(
         serialiser.serialize_line(mols, human=True, name='Vigamox'),
         'COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O Vigamox\n'
         'Cl.COc1c(N2C[C@@H]3CCCN[C@@H]3C2)c(F)cc4C(=O)C(=CN(C5CC5)c14)C(=O)O Vigamox\n'
     )
예제 #21
0
    def test_mols_to_targets(self):
        ret = mols_to_targets(['CHEMBL819'], organism='Escherichia coli')
        serialiser = get_serializer('uniprot')
        self.assertEqual(
            set(serialiser.serialize_line(ret).strip().split(',')),
            {'A1E3K9', 'P62593', 'P35695'})

        ret = mols_to_targets(['CHEMBL819'],
                              organism='Escherichia coli',
                              include_parents=True)
        serialiser = get_serializer('uniprot')
        self.assertEqual(
            set(serialiser.serialize_line(ret).strip().split(',')),
            {'A1E3K9', 'P62593', 'P35695', 'P00811'})

        ret = mols_to_targets(['CHEMBL25', 'CHEMBL1'])
        serialiser = get_serializer('gene_name')
        self.assertTrue({
            'POLK', 'ADRA2RL2', 'CCR2', 'CHRM5', 'ADRA2L1', 'CMKAR1', 'PTPRC',
            'HADH2', 'ITGAB', 'VIPR1'
        }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))

        ret = mols_to_targets(['CHEMBL2', 'CHEMBL1737'])
        serialiser = get_serializer('chembl_id')
        targets_a = set(serialiser.serialize_line(ret).strip().split(','))
        self.assertTrue({
            'CHEMBL213',
            'CHEMBL1916',
            'CHEMBL205',
            'CHEMBL4071',
            'CHEMBL1909043',
            'CHEMBL2622',
        }.issubset(targets_a))

        ret = mols_to_targets(['CHEMBL2', 'CHEMBL1737'], only_ids=True)
        serialiser = get_serializer('chembl_id')
        targets_b = set(serialiser.serialize_line(ret).strip().split(','))
        self.assertTrue({
            'CHEMBL213',
            'CHEMBL1916',
            'CHEMBL205',
            'CHEMBL4071',
            'CHEMBL1909043',
            'CHEMBL2622',
        }.issubset(targets_b))

        self.assertEqual(targets_a, targets_b)

        drugs = ['Viagra', 'Gleevec']

        ret = mols_to_targets(
            [resolve(x)[0]['molecule_chembl_id'] for x in drugs])
        serialiser = get_serializer('uniprot')
        self.assertTrue({
            'Q9BQI3',
            'P00523',
            'O15111',
            'P22612',
            'P25021',
            'P16591',
            'Q13153',
            'Q8WXR4',
            'P28564',
        }.issubset(set(serialiser.serialize_line(ret).strip().split(','))))
def main():
    similarity = new_client.similarity
    options = get_options()

    try:
        threshold = int(options.threshold)
        if threshold not in range(70, 101):
            sys.stderr.write(
                'Threshold should be an integer in range [70-100]')
            return
        threshold = str(threshold)
    except:
        sys.stderr.write('Threshold should be an integer in range [70-100]')
        return

    source_format = options.source_format.lower()
    if source_format not in AVAILABLE_SOURCE_FORMATS:
        sys.stderr.write('Unsupported source format', options.source_format)
        return

    inp = sys.stdin
    if source_format == 'sdf':
        with open(options.input) if options.input else sys.stdin as in_f:
            options.input = None
            inp = convert_to_smiles(in_f)

    with open(options.input) if options.input else inp as in_f, \
            open(options.output, 'w') if options.output else sys.stdout as out_f:

        serializer_cls = get_serializer(options.dest_format)
        if not serializer_cls:
            sys.stderr.write('Unsupported format', options.dest_format)
            return

        if options.human:
            serializer_cls.write_header(out_f)

        for line in in_f:
            if not line or line.lower().startswith('smiles'):
                continue
            chunk = line.strip().split()[0]
            identifiers = chunk.strip().split(',')
            valid_identifiers = list()
            sim = list()
            for identifier in identifiers:
                if chembl_id_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sim.extend(
                        list(
                            similarity.filter(chembl_id=identifier,
                                              similarity=threshold)))
                elif smiles_regex.match(identifier):
                    valid_identifiers.append(identifier)
                    sim.extend(
                        list(
                            similarity.filter(smiles=identifier,
                                              similarity=threshold)))
            sim = list({v['molecule_chembl_id']: v for v in sim}.values())
            out_f.write(
                serializer_cls.serialize_line(
                    sim, human=options.human,
                    name=','.join(valid_identifiers)))