Esempio n. 1
0
def process():
    if len(sys.argv) < 4:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [exclude-molecules.sdf] [output.sdf]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    xifs = Base.FileIOStream(sys.argv[2], 'r')
    ofs = Base.FileIOStream(sys.argv[3], 'w')

    reader = Chem.SDFMoleculeReader(ifs)
    xreader = Chem.SDFMoleculeReader(xifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    mol = Chem.BasicMolecule()

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfImportParameter(xreader, False)
    Chem.setMultiConfExportParameter(writer, False)

    stats = Stats()
    stats.read = 0
    stats.dropped = 0

    xhashes = set()

    while xreader.read(mol):
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)
        xhashes.add(hashcode)

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)

        if hashcode in xhashes:
            stats.dropped += 1
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  Chem.generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)
        else:
            writer.write(mol)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read) + ' Molecules...',
                  file=sys.stderr)

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
def calculate_molecule_hashcode(mol, stereo=True):
    Chem.makeHydrogenDeplete(mol)
    Chem.calcImplicitHydrogenCounts(mol, True)
    if stereo:
        Chem.calcAtomStereoDescriptors(mol, True)
        Chem.calcBondStereoDescriptors(mol, True)
        Chem.calcCIPPriorities(mol, True)
        Chem.calcAtomCIPConfigurations(mol, True)
        Chem.calcBondCIPConfigurations(mol, True)
        return Chem.calcHashCode(mol)
    else:
        return Chem.calcHashCode(mol,
                                 atom_flags=Chem.AtomPropertyFlag.TYPE
                                 | Chem.AtomPropertyFlag.H_COUNT
                                 | Chem.AtomPropertyFlag.FORMAL_CHARGE
                                 | Chem.AtomPropertyFlag.AROMATICITY,
                                 bond_flags=Chem.BondPropertyFlag.ORDER
                                 | Chem.BondPropertyFlag.TOPOLOGY
                                 | Chem.BondPropertyFlag.AROMATICITY)