コード例 #1
0
def std(mol, returnMetals=False):
    # Standardize and return a dictionary with the smiles as keys
    # and the molecule object and whether it's a metal ion as values
    stdD = {}

    # Check single atom compounds, to see if they are metal ions
    if mol.GetNumAtoms() == 1:
        at = mol.GetAtoms()[0].GetSymbol()
        symbol = '[%s]' % at
        if at in _metals and returnMetals:
            cmpd = Chem.MolFromSmiles(symbol)
            stdD[symbol] = (cmpd, True, True, '')
        else:
            (passed, std_cmpd, errmessage) = standardise.run(mol)
            if passed:
                stdD[symbol] = (std_cmpd, False, passed, errmessage)
    else:
        # Extract metal ions from complex compounds
        comp_mol, metals = disconnect(mol)
        if returnMetals:
            for metal in metals:
                metalmol = Chem.MolFromSmiles(metal)
                metal = '[%s]' % metalmol.GetAtoms()[0].GetSymbol()
                cmpd = Chem.MolFromSmiles(metal)
                stdD[metal] = (cmpd, True, True, '')

        # For the rest of the molecule, standardize and add
        standardise.run(comp_mol)
        try:
            (passed, std_cmpds, errmessage) = standardise.run(comp_mol)
        except:
            passed = False
            errmessage = 'Failed'

        if passed:
            stdD[Chem.MolToSmiles(std_cmpds,
                                  isomericSmiles=True)] = (std_cmpds, False,
                                                           True, '')
        elif errmessage == 'Multiple non-salt/solvate components':
            cmpdD = {}
            for cmpd in std_cmpds:
                inchi = Chem.MolToInchi(cmpd)
                cmpdD[inchi] = cmpd

            for inchi in cmpdD:
                cmpd = cmpdD[inchi]
                stdD[Chem.MolToSmiles(cmpd, isomericSmiles=True)] = (
                    cmpd, False, True, 'Multiple non-salt/solvate components')
        else:
            stdD[Chem.MolToSmiles(mol,
                                  isomericSmiles=True)] = (mol, False, False,
                                                           errmessage)

    return stdD
コード例 #2
0
def process_data(type_dataset, dataset, i):
    new_ids = []
    fingerprints = []
    smiles = []
    mol_oh = []

    n_smiles = 0
    for smile in dataset:
        try:
            mol = standardise.run(smile)
            if len(mol) <= 120:
                fp = AllChem.GetMorganFingerprintAsBitVect(
                    Chem.MolFromSmiles(mol), 2, nBits=1024)
                m_oh = ohf.featurize([mol], 120)
                if str(m_oh) != 'nan':
                    new_ids.append('{}_{}'.format(type_dataset,
                                                  n_smiles + 1 + i))
                    fingerprints.append('[{}]'.format(','.join(
                        [str(x) for x in fp])))
                    smiles.append(mol)
                    mol_oh.append(m_oh[0])
                    n_smiles += 1
        except:
            print('{} in {}'.format(smile, type_dataset))
            pass

    print(len(smiles))

    return np.string_(new_ids), np.string_(fingerprints), np.string_(
        smiles), np.array(mol_oh), n_smiles + i
コード例 #3
0
def get_rdk_mol(smi, perform_standardisation=False):
    mol = Chem.MolFromSmiles(smi)
    if mol is None:
        raise Exception
    if mol is not None and perform_standardisation:
        try:
            mol = standardise.run(mol)
        except standardise.StandardiseException as e:
            pass
    return mol
コード例 #4
0
def get_canonical_smile(x):
    """
    Make our smiles canonical
    :param x: smile (string)
    :return: canonical smile (string)
    """
    try:
        return standardise.run(x)
    except Exception:
        return 'None'
コード例 #5
0
def calc_descriptors(row,
                     fp_type,
                     fp_radius,
                     con_desc_list,
                     stdrise=True,
                     hashed=False):
    rdmol = Chem.MolFromSmiles(row['smiles'])
    if rdmol:
        if stdrise:
            try:
                rdmol = standardise.run(rdmol, output_rules_applied=[])
            except:
                return None, None, None, None, None, None, None
        if fp_type == 'ecfp':
            if hashed:
                fps = AllChem.GetMorganFingerprintAsBitVect(rdmol,
                                                            fp_radius,
                                                            2048,
                                                            useFeatures=False)
                fps = {key: 1 for key in fps.GetOnBits()}
            else:
                # NB works better with binary features, removing FP feature freq (useCounts=False)
                fps = AllChem.GetMorganFingerprint(
                    rdmol, fp_radius, useFeatures=False,
                    useCounts=False).GetNonzeroElements()
        else:
            if hashed:
                fps = AllChem.GetMorganFingerprintAsBitVect(rdmol,
                                                            fp_radius,
                                                            2048,
                                                            useFeatures=True)
                fps = {key: 1 for key in fps.GetOnBits()}
            else:
                fps = AllChem.GetMorganFingerprint(
                    rdmol, fp_radius, useFeatures=True,
                    useCounts=False).GetNonzeroElements()
        alogp = Descriptors.MolLogP(
            rdmol) if 'alogp' in con_desc_list else None
        mw = Descriptors.MolWt(rdmol) if 'mw' in con_desc_list else None
        n_h_atoms = Descriptors.HeavyAtomCount(
            rdmol) if 'n_h_atoms' in con_desc_list else None
        rtb = Descriptors.NumRotatableBonds(
            rdmol) if 'rtb' in con_desc_list else None
        hbd = Descriptors.NumHDonors(rdmol) if 'hbd' in con_desc_list else None
        hba = Descriptors.NumHAcceptors(
            rdmol) if 'hba' in con_desc_list else None
        return fps, alogp, mw, n_h_atoms, rtb, hbd, hba
    else:
        return None, None, None, None, None, None, None
コード例 #6
0
def standardize(mol):
    """
    Wrapper to aply the structure normalization protocol provided by Francis Atkinson (EBI). If no non-salt components can be found in the mixture, the original mixture is returned.

    Returns a tuple containing:
        1) True/False: depending on the result of the method
        2) (if True ) The output molecule
           (if False) The error message
    """
    try:
        parent = standardise.run(Chem.MolToMolBlock(mol))
    except standardise.StandardiseException as e:
        if e.name == "no_non_salt":
            parent = Chem.MolToMolBlock(mol)
        else:
            return (False, e.name)

    return (True, parent)
コード例 #7
0
ファイル: PRF_evaluation.py プロジェクト: BenderGroup/PRF
def preprocessMolecule(inp):
	if not inp: return False
	def checkC(mm):
		mwt = Descriptors.MolWt(mm)
		for atom in mm.GetAtoms():
			if atom.GetAtomicNum() == 6 and 100 <= mwt <= 1000: return True
		return False
	def checkHm(mm):
		for atom in mm.GetAtoms():
			if atom.GetAtomicNum() in [2,10,13,18]: return False
			if 21 <= atom.GetAtomicNum() <= 32: return False
			if 36 <= atom.GetAtomicNum() <= 52: return False
			if atom.GetAtomicNum() >= 54: return False
		return True
	try: std_mol = standardise.run(inp)
	except standardise.StandardiseException: return None
	if not std_mol or checkHm(std_mol) == False or checkC(std_mol) == False: return None
	else: return std_mol
コード例 #8
0
def mol_to_standardised_mol(mol, name=None):
    """Standardise mol(s)."""
    try:
        from standardiser import standardise
        from standardiser.utils import StandardiseException
    except ImportError:
        logging.warning(
            "standardiser module unavailable. Using unstandardised mol.")
        return mol

    if name is None:
        try:
            name = mol.GetProp("_Name")
        except KeyError:
            name = repr(mol)

    if isinstance(mol, PropertyMol):
        mol_type = PropertyMol
        mol = rdkit.Chem.Mol(mol)
    else:
        mol_type = rdkit.Chem.Mol

    logging.debug("Standardising {}".format(name))
    try:
        std_mol = standardise.run(mol)
    except AttributeError:  # backwards-compatible with old standardiser
        std_mol = standardise.apply(mol)
    except StandardiseException:
        logging.error(
            ("Standardisation of {} failed. Using unstandardised "
             "mol.".format(name)),
            exc_info=True,
        )
        return mol_type(mol)

    std_mol = mol_type(std_mol)
    try:
        std_mol.SetProp("_Name", mol.GetProp("_Name"))
    except KeyError:
        pass

    return std_mol
コード例 #9
0
            mols_now.append(m)
            ys.append(y_now)
            mol_ids.append(chembl_ids[i])
    activities = ys
    chembl_ids = visited

    from standardiser import standardise
    import logging
    incorrect_mols = []  # to remove those that cannot be standardised
    mols = []
    #standardise.logger.setLevel('DEBUG')
    for i, m in enumerate(mols_now):
        print "Standardizing molecule: ", i
        parent = None
        try:
            parent = standardise.run(m)
            mols.append(parent)
        except standardise.StandardiseException as e:
            logging.warning(e.message)
            incorrect_mols.append(i)

    activities = [
        x for i, x in enumerate(activities) if i not in incorrect_mols
    ]
    chembl_ids = [
        x for i, x in enumerate(chembl_ids) if i not in incorrect_mols
    ]

    #--------------------------------------------------------
    # writing in .sdf format:
    #--------------------------------------------------------
コード例 #10
0
ファイル: idata.py プロジェクト: josecarlosgomezt/flame
    def normalize(self, ifile, method):
        '''
        Generates a simplified SDFile with MolBlock and an internal ID for
        further processing

        Note that this method is applied to every molecule and that it removes
        mol blocks in the input SDFile not able to generate a valid mol

        Also, when defined in control, applies chemical standardization
        protocols, like the one provided by Francis Atkinson (EBI),
        accessible from:

            https://github.com/flatkinson/standardiser

        Returns a tuple containing the result of the method and (if True)
        the name of the output molecule and an error message otherwyse

        '''

        success_list = [True for i in range(sdfu.count_mols(ifile))]

        if not method:
            method = ''

        LOG.info('Starting normalization...')
        try:
            suppl = Chem.SDMolSupplier(ifile)
            LOG.debug(f'mol supplier created from {ifile}')
        except Exception as e:
            LOG.error('Unable to create mol supplier with the exception: '
                      f'{e}')
            return False, 'Error at processing input file for standardizing structures'

        filename, fileext = os.path.splitext(ifile)
        ofile = filename + '_std' + fileext
        LOG.debug(f'writing standarized molecules to {ofile}')
        with open(ofile, 'w') as fo:
            mcount = 0
            # merror = 0
            for m in suppl:

                # molecule not recognised by RDKit
                if m is None:
                    LOG.error('Unable to process molecule'
                              f' #{mcount+1} in {ifile}')
                    continue

                name = sdfu.getName(m,
                                    count=mcount,
                                    field=self.parameters['SDFile_name'],
                                    suppl=suppl)

                parent = None

                if 'standardize' in method:
                    try:

                        parent = standardise.run(Chem.MolToMolBlock(m))

                    except standardise.StandardiseException as e:

                        if e.name == "no_non_salt":
                            # very commong warning, use parent mol and proceed
                            LOG.debug(
                                f'"No non salt error" found. Skiped standardize for mol'
                                f' #{mcount} {name}')
                            parent = Chem.MolToMolBlock(m)
                        else:
                            # serious issue, no parent was generated, use original mol
                            if (parent is None):
                                LOG.error(
                                    f'Critical standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Skipping normalization'
                                )
                                parent = Chem.MolToMolBlock(m)
                            # minor isse, parent was generated, show a warning and proceed
                            else:
                                LOG.info(
                                    f'Standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Normalization applied'
                                )
                        #return False, e.name

                    except Exception as e:
                        # this error means an execution error running standardizer
                        # the molecule is discarded and therefore the list of molecules must be updated
                        LOG.error(
                            f'Critical standardize execution exception {e}'
                            f' when processing mol #{mcount} {name}. Discarding molecule'
                        )
                        success_list[mcount] = False
                        continue

                else:
                    LOG.info(f'Skipping normalization.')
                    parent = Chem.MolToMolBlock(m)

                # in any case, write parent plus internal ID (flameID)
                fo.write(parent)

                # *** discarded method to control errors ****
                # flameID = 'fl%0.10d' % mcount
                # fo.write('>  <flameID>\n'+flameID+'\n\n')

                mcount += 1

                # terminator
                fo.write('$$$$\n')

        return success_list, ofile
コード例 #11
0
ファイル: sanify_utils.py プロジェクト: joeaaa/pipelines
def flatkinsonStandardizer(mol):
    return standardise.run(mol)
コード例 #12
0
    prism_zinc.extend(smile)

for item in [smiles, zinc_smiles]:
    del item
gc.collect()

prism_zinc = shuffle(prism_zinc)

print("Before standardiser: {}".format(len(prism_zinc)))

standard_smiles = []
for i in range(len(prism_zinc)):
    smile = prism_zinc[i]
    try:
        m = Chem.MolToSmiles(Chem.MolFromSmiles(smile),
                             isomericSmiles=True,
                             canonical=True)
        mol = standardise.run(m)
        standard_smiles.append(mol)
    except standardise.StandardiseException:
        pass

print("After standardiser: {}".format(len(standard_smiles)))

del prism_zinc
gc.collect()

with open(
        '/hps/research1/icortes/acunha/data/ZINC_PRISM_SMILES/zinc_prism_smiles_processed.smi',
        "w") as f:
    f.write('\n'.join(standard_smiles))
コード例 #13
0
ファイル: insert_ligand.py プロジェクト: suchopaa/chembdb
        continue
    chembl_help.append(list(chembl[i]))
    i = i + 1

#pprint (chembl_help)

#Chembl standardize
for lig in range(0, len(chembl_help)):
    #print ('Now I do this from Chembl: ' + chembl_help[lig][0])
    mol = inchi.MolFromInchi(chembl_help[lig][0], sanitize=False)
    try:
        rdmolops.RemoveStereochemistry(mol)
    except Exception:
        print("Not able to remove stereochemistry. Chembl.")
    try:
        mol = standardise.run(mol)
    except standardise.StandardiseException as e:
        logging.warn(e.message)
    try:
        mol = s.standardize(mol)
    except Exception:
        print("Not able to standardize. Chembl.")
    try:
        mol = s.tautomer_parent(mol, skip_standardize=True)
    except Exception:
        print("Not able to make tautomer parent. Chembl.")
    mol = s.stereo_parent(mol, skip_standardize=True)
    chembl_help[lig][0] = inchi.MolToInchi(mol)

#BDB preparing
bdb_help = []
コード例 #14
0
def normalize(inF, outF, singleF, failedF, remove_salts= True, keep_nonorganic= False, verbose=False, pH=7.4) :
      
    count = 0        ## count for the whole dataset
    count_inc = 0    ## count for only included molecules
    count_exc = 0    ## count for only excluded molecules
    all_salts = 0    ## count for entries with only salts / solvent
    fail_sanity = 0  ## count for entries that fail sanity check 
    fail_mol = 0     ## count for entries that fail to create mol object 
    fail_prot = 0    ## count for entries that fail protonation

    header = '%s\n' %('\t'.join(['CAS', 'Component', 'Original smiles', 'smiles']))
    fail_header = '%s\n' %('\t'.join(['CAS', 'Original smiles', 'Error']))

    outF.write(header)
    singleF.write(header)
    failedF.write(fail_header)
    
    for line in inF:
        count += 1
        try:
            cas, smi = line.rstrip().split('\t')
        except:
            print ('Failed parsing line:')
            print (line)
            failedF.write(line.rstrip()+'\tFailed parsing line\n')
            continue
        mol = Chem.MolFromSmiles(smi)
        if mol is None:
            count_exc += 1
            fail_mol += 1
            failedF.write(line.rstrip()+'\tFailed to create molecule object\n')
            continue

        try:
            #mol = standardise.run(mol, keep_nonorganic= keep_nonorganic, remove_salts= remove_salts)
            succ, mol, err = standardise.run(mol, keep_nonorganic= keep_nonorganic)
        except Exception as err:
            err = '{}'.format(err)
            count_exc += 1
            fail_sanity += 1
            failedF.write('{}\t{}\t{}\n'.format(cas, smi, err))
            continue

        i = 1
        if succ:
            count_inc += 1
            nHA = mol.GetNumHeavyAtoms()
            if nHA < 2:
                singleF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(mol, isomericSmiles=True)))
            else:
                outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(mol, isomericSmiles=True)))
                #prot, protMol = protonate(mol, pH)
                #if prot:
                #    outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(protMol, isomericSmiles=True)))
                #else:
                #    failedF.write('{}\t{}\t{}\n'.format(cas, smi, protMol))
                #    fail_prot += 1
        else:
            smis = set([Chem.MolToSmiles(moli, isomericSmiles=True) for moli in mol])
            if err == 'Multiple non-salt/solvate components':
                for smii in smis:
                    moli = Chem.MolFromSmiles(smii)
                    nHA = moli.GetNumHeavyAtoms()
                    if nHA < 2:
                        singleF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, smii))
                    else:
                        outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, smii))
                        #prot, protMol = protonate(Chem.MolFromSmiles(smii), pH)
                        #if prot:
                        #    outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(protMol, isomericSmiles=True)))
                        #else:
                        #    failedF.write('{}\t{}\t{}\n'.format(cas, smi, protMol))
                        #    fail_prot += 1
                    i += 1
                count_inc += 1
            elif err == 'No non-salt/solvate components':
                metal = False
                for smii in smis:
                    moli = Chem.MolFromSmiles(smii)
                    nHA = moli.GetNumHeavyAtoms()
                    if nHA == 1 and moli.GetAtomWithIdx(0).GetSymbol() in _metals:
                        singleF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, smii))
                        metal = True
                        i += 1
                if metal:
                    count_inc += 1
                else:
                    count_exc += 1
                    all_salts += 1
                    failedF.write('{}\t{}\t{}\n'.format(cas, smi, err))
    
    os.system('rm in.sdf out.sdf')
    print ('the full dataset = {}'.format(count))
    print ('Molecules normalized = {}'.format(count_inc))
    print ('Molecules excluded = {}'.format(count_exc))
    print ('   Fail RDkit mol object = {}'.format(fail_mol))
    print ('   Fail protonation = {}'.format(fail_prot))
    print ('   Fail sanity check = {}'.format(fail_sanity))
    print ('   Only salts / solvent = {}'.format(all_salts))
コード例 #15
0
def main():

    ########################################################################
    # 
    # Program Parameters...
    # 

    script_name = os.path.splitext(os.path.basename(sys.argv[0]))[0]

    ######

    # Options, arguments and logging...

    argparser = argparse.ArgumentParser(description="Standardise compounds")

    argparser.add_argument("-V", "--verbose", action="store_true", help="enable verbose logger")
    argparser.add_argument("-r", "--output_rules_applied", action="store_true", help="enable output of rules applied")

    argparser.add_argument("infile", help="Input file (SDF or SMILES)")

    config = argparser.parse_args()

    logger = make_logger.run(__name__)

    ######

    # Initialisation...

    rule_names = ["{:02d} {}".format(x['n'], x['name']) for x in standardise.rules.rule_set]

    counts = Counter({x: 0 for x in list(errors.keys()) + ['read', 'standardised']}) 

    input_type = os.path.splitext(config.infile)[1] # sdf or smi

    ######

    logger.info("Input type = '{in_type}'".format(in_type=input_type))

    if input_type == ".sdf": # Read/write SDF...

        infile = SDF.readFile(open(config.infile))

        outfile = open("standardised.sdf", "w")
        errfile = open("errors.sdf", "w")

        for original in infile:

            counts["read"] += 1

            logger.info(">>> Starting mol '{name}'...".format(name=original.name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(original.molblock, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(original.molblock)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for '{name}'".format(error=errors[err.name], name=original.name))

                counts[err.name] += 1

                errfile.write("{mol}>  <n>\n{nread}\n\n>  <error>\n{error}\n\n$$$$\n".format(mol=original.molblock, nread=counts["read"], error=errors[err.name]))

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=original.name))

                counts["standardised"] += 1

                parent = re.sub(r'^\w*\n', original.name + '\n', parent)

                if config.output_rules_applied:

                    rules_applied = ';'.join(rule_names[x-1] for x in rules_applied) if rules_applied else ''

                    outfile.write("{mol}>  <n>\n{nread}\n\n<rules_applied>\n{rules}\n\n$$$$\n".format(mol=parent, nread=counts["read"], rules=rules_applied))

                else:

                    outfile.write("{mol}>  <n>\n{nread}\n\n$$$$\n".format(mol=parent, nread=counts["read"]))

            if counts["read"] % 100 == 0: logger.info("...done: {read} read, {standardised} OK...".format(**counts))

    else: # Read/write (tab-seperated) SMILES + name...

        infile = csv.reader(open(config.infile), delimiter="\t")

        outfile = csv.writer(open("standardised.smi", "w"), delimiter="\t")
        errfile = csv.writer(open("errors.smi", "w"), delimiter="\t")

        for original in infile:

            counts["read"] += 1

            smiles, name = original

            logger.info(">>> Starting mol '{name}'...".format(name=name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(smiles, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(smiles)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for mol '{name}'".format(error=errors[err.name], name=name))

                counts[err.name] += 1

                errfile.writerow(original + [err.name])

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=name))

                counts["standardised"] += 1

                if config.output_rules_applied:

                    rules_applied = ';'.join(rule_names[x-1] for x in rules_applied) if rules_applied else ''

                    outfile.writerow([parent, name, smiles, rules_applied])

                else:

                    outfile.writerow([parent, name])

            if counts["read"] % 100 == 0: logger.info("...done: {read} read, {standardised} OK...".format(**counts))

    logger.info("Finished: {read} read, {standardised} OK in total.".format(**counts))

    logger.info("Counts: " + json.dumps(counts, indent=4))
コード例 #16
0
def main():

    ########################################################################
    #
    # Program Parameters...
    #

    script_name = os.path.splitext(os.path.basename(sys.argv[0]))[0]

    ######

    # Options, arguments and logging...

    argparser = argparse.ArgumentParser(description="Standardise compounds")

    argparser.add_argument("-V",
                           "--verbose",
                           action="store_true",
                           help="enable verbose logger")
    argparser.add_argument("-r",
                           "--output_rules_applied",
                           action="store_true",
                           help="enable output of rules applied")

    argparser.add_argument("-i",
                           dest="infile",
                           help="Input file (SDF or SMILES)")
    argparser.add_argument("-o", dest="outfile", help="Output file")

    config = argparser.parse_args()

    logger = make_logger.run(__name__)

    ######

    # Initialisation...

    rule_names = [
        "{:02d} {}".format(x['n'], x['name'])
        for x in standardise.rules.rule_set
    ]

    counts = Counter(
        {x: 0
         for x in list(errors.keys()) + ['read', 'standardised']})

    input_type = os.path.splitext(config.infile)[1]  # sdf or smi
    outfile_basename = os.path.splitext(config.infile)[0]
    outfile_ext = os.path.splitext(config.infile)[1]

    ######

    logger.info("Input type = '{in_type}'".format(in_type=input_type))

    if input_type == ".sdf":  # Read/write SDF...

        infile = SDF.readFile(open(config.infile))

        outfile = open(config.outfile, "w")
        errfile = open(outfile_basename + "_errors." + outfile_ext, "w")

        for original in infile:

            counts["read"] += 1

            logger.info(
                ">>> Starting mol '{name}'...".format(name=original.name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(
                        original.molblock, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(original.molblock)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for '{name}'".format(
                    error=errors[err.name], name=original.name))

                counts[err.name] += 1

                errfile.write(
                    "{mol}>  <n>\n{nread}\n\n>  <error>\n{error}\n\n$$$$\n".
                    format(mol=original.molblock,
                           nread=counts["read"],
                           error=errors[err.name]))

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=original.name))

                counts["standardised"] += 1

                parent = re.sub(r'^\w*\n', original.name + '\n', parent)

                if config.output_rules_applied:

                    rules_applied = ';'.join(
                        rule_names[x - 1]
                        for x in rules_applied) if rules_applied else ''

                    outfile.write(
                        "{mol}>  <n>\n{nread}\n\n<rules_applied>\n{rules}\n\n$$$$\n"
                        .format(mol=parent,
                                nread=counts["read"],
                                rules=rules_applied))

                else:

                    outfile.write("{mol}>  <n>\n{nread}\n\n$$$$\n".format(
                        mol=parent, nread=counts["read"]))

            if counts["read"] % 100 == 0:
                logger.info(
                    "...done: {read} read, {standardised} OK...".format(
                        **counts))

    else:  # Read/write (tab-seperated) SMILES + name...

        infile = csv.reader(open(config.infile), delimiter="\t")
        outfile = csv.writer(open(config.outfile, "w"), delimiter="\t")
        errfile_name = outfile_basename + "_errors." + outfile_ext
        errfile = csv.writer(open(errfile_name, "w"), delimiter="\t")

        for original in infile:

            counts["read"] += 1

            smiles, name = original

            logger.info(">>> Starting mol '{name}'...".format(name=name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(
                        smiles, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(smiles)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for mol '{name}'".format(
                    error=errors[err.name], name=name))

                counts[err.name] += 1

                errfile.writerow(original + [err.name])

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=name))

                counts["standardised"] += 1

                if config.output_rules_applied:

                    rules_applied = ';'.join(
                        rule_names[x - 1]
                        for x in rules_applied) if rules_applied else ''

                    outfile.writerow([parent, name, smiles, rules_applied])

                else:

                    outfile.writerow([parent, name])

            if counts["read"] % 100 == 0:
                logger.info(
                    "...done: {read} read, {standardised} OK...".format(
                        **counts))

    logger.info(
        "Finished: {read} read, {standardised} OK in total.".format(**counts))

    logger.info("Counts: " + json.dumps(counts, indent=4))