else:
            can_dupes2[cas].append(data)

    for cas in can_dupes2.keys():
        groups = can_dupes2[cas]
        if len(groups) > 1:
           print 'compound with cas=%s is considered different by OB canonical smiles'%cas
           for group in groups:
                print group[0].write('can').strip()
           print '-'*80

    union = sorted([dupe[0] for dupe in cas_dupes.values()], key=lambda mol: mol.title)
    save_mols(union, op.join(root, 'mutagenicity-all-cas-union.sdf'))
    print '\t\tUnion size=%d' % len(union)

    dest_sdf = op.join(root, 'mutagenicity-all-cas-union-prepared.sdf')
    prepare_dataset(op.join(root, 'mutagenicity-all-cas-union.sdf'), rename=False, conformations=True)

    #Depict the molecules
    depict(dest_sdf)

    #Molecular descriptors
    print 'Computing fingerprints via JCompoundMapper' #TODO: Extract-method this
    jcm_fingerprint(dest_sdf, ('ECFP', 'ECFPVariant', 'PHAP3POINT2D', 'SHED', 'DFS', 'RAD2D'))
    jcm_fingerprint(dest_sdf, ('LSTAR', 'RAD3D', 'PHAP3POINT3D'))
    print 'Computing descriptors via CDKDescUI'
    cdkdescuiprops(dest_sdf, desc_types=('constitutional',))
    print 'Computing spectrophores'
    spectrophores(dest_sdf)
    print 'Saving in several data analysis tools file formats'
    prop4da(dest_sdf)
Beispiel #2
0
        print '\tCreating \"master\" table: %s' % master_table
        create_master_table(dest_sdf, master_table, fields=['Activity'])

        print '\tCreating \"saliviewer\" table: %s' % sali_table
        create_saliviewer_input(master_table, sali_table)

    return dest_sdf, master_table

if __name__ == '__main__':
    DEFAULT_AMESV2_DIR = op.join(op.expanduser('~'), 'Proyectos', 'bsc', 'data', 'filtering', 'mutagenicity')
    root = DEFAULT_AMESV2_DIR
    dataset = op.join(root, 'tox_benchmark_N6512.sdf') #TODO: check if it exists, otherwise download
    dest_sdf, master_table = prepare_dataset(dataset)

    #Depict the molecules
    depict(dest_sdf)

    #Molecular descriptors
    print 'Computing descriptors via CDKDescUI'
    cdkdescuiprops(dataset)
    print 'Computing spectrophores'
    spectrophores(dataset)
    print 'Saving in several data analysis tools file formats'
    prop4da(dataset)
    print 'Computing fingerprints via JCompoundMapper' #TODO: Extract-method this
    FINGERPRINTS = ('ECFP', 'CATS2D')
    for fp in FINGERPRINTS:
        print fp
        output = op.join(root, 'tox_benchmark_N6512-jcm-' +fp +'.arff')
        JCompoundMapperCLIDriver().fingerprint(dataset, output, label='Activity')