Python runの例、standardiser.standardise.run Pythonの例

コード例 #1

0

ファイルを表示

ファイル: process_smiles.py プロジェクト: phi-grib/Data_curation

def std(mol, returnMetals=False):
    # Standardize and return a dictionary with the smiles as keys
    # and the molecule object and whether it's a metal ion as values
    stdD = {}

    # Check single atom compounds, to see if they are metal ions
    if mol.GetNumAtoms() == 1:
        at = mol.GetAtoms()[0].GetSymbol()
        symbol = '[%s]' % at
        if at in _metals and returnMetals:
            cmpd = Chem.MolFromSmiles(symbol)
            stdD[symbol] = (cmpd, True, True, '')
        else:
            (passed, std_cmpd, errmessage) = standardise.run(mol)
            if passed:
                stdD[symbol] = (std_cmpd, False, passed, errmessage)
    else:
        # Extract metal ions from complex compounds
        comp_mol, metals = disconnect(mol)
        if returnMetals:
            for metal in metals:
                metalmol = Chem.MolFromSmiles(metal)
                metal = '[%s]' % metalmol.GetAtoms()[0].GetSymbol()
                cmpd = Chem.MolFromSmiles(metal)
                stdD[metal] = (cmpd, True, True, '')

        # For the rest of the molecule, standardize and add
        standardise.run(comp_mol)
        try:
            (passed, std_cmpds, errmessage) = standardise.run(comp_mol)
        except:
            passed = False
            errmessage = 'Failed'

        if passed:
            stdD[Chem.MolToSmiles(std_cmpds,
                                  isomericSmiles=True)] = (std_cmpds, False,
                                                           True, '')
        elif errmessage == 'Multiple non-salt/solvate components':
            cmpdD = {}
            for cmpd in std_cmpds:
                inchi = Chem.MolToInchi(cmpd)
                cmpdD[inchi] = cmpd

            for inchi in cmpdD:
                cmpd = cmpdD[inchi]
                stdD[Chem.MolToSmiles(cmpd, isomericSmiles=True)] = (
                    cmpd, False, True, 'Multiple non-salt/solvate components')
        else:
            stdD[Chem.MolToSmiles(mol,
                                  isomericSmiles=True)] = (mol, False, False,
                                                           errmessage)

    return stdD

コード例 #2

0

ファイルを表示

def process_data(type_dataset, dataset, i):
    new_ids = []
    fingerprints = []
    smiles = []
    mol_oh = []

    n_smiles = 0
    for smile in dataset:
        try:
            mol = standardise.run(smile)
            if len(mol) <= 120:
                fp = AllChem.GetMorganFingerprintAsBitVect(
                    Chem.MolFromSmiles(mol), 2, nBits=1024)
                m_oh = ohf.featurize([mol], 120)
                if str(m_oh) != 'nan':
                    new_ids.append('{}_{}'.format(type_dataset,
                                                  n_smiles + 1 + i))
                    fingerprints.append('[{}]'.format(','.join(
                        [str(x) for x in fp])))
                    smiles.append(mol)
                    mol_oh.append(m_oh[0])
                    n_smiles += 1
        except:
            print('{} in {}'.format(smile, type_dataset))
            pass

    print(len(smiles))

    return np.string_(new_ids), np.string_(fingerprints), np.string_(
        smiles), np.array(mol_oh), n_smiles + i

コード例 #3

0

ファイルを表示

def get_rdk_mol(smi, perform_standardisation=False):
    mol = Chem.MolFromSmiles(smi)
    if mol is None:
        raise Exception
    if mol is not None and perform_standardisation:
        try:
            mol = standardise.run(mol)
        except standardise.StandardiseException as e:
            pass
    return mol

コード例 #4

0

ファイルを表示

def get_canonical_smile(x):
    """
    Make our smiles canonical
    :param x: smile (string)
    :return: canonical smile (string)
    """
    try:
        return standardise.run(x)
    except Exception:
        return 'None'

コード例 #5

0

ファイルを表示

def calc_descriptors(row,
                     fp_type,
                     fp_radius,
                     con_desc_list,
                     stdrise=True,
                     hashed=False):
    rdmol = Chem.MolFromSmiles(row['smiles'])
    if rdmol:
        if stdrise:
            try:
                rdmol = standardise.run(rdmol, output_rules_applied=[])
            except:
                return None, None, None, None, None, None, None
        if fp_type == 'ecfp':
            if hashed:
                fps = AllChem.GetMorganFingerprintAsBitVect(rdmol,
                                                            fp_radius,
                                                            2048,
                                                            useFeatures=False)
                fps = {key: 1 for key in fps.GetOnBits()}
            else:
                # NB works better with binary features, removing FP feature freq (useCounts=False)
                fps = AllChem.GetMorganFingerprint(
                    rdmol, fp_radius, useFeatures=False,
                    useCounts=False).GetNonzeroElements()
        else:
            if hashed:
                fps = AllChem.GetMorganFingerprintAsBitVect(rdmol,
                                                            fp_radius,
                                                            2048,
                                                            useFeatures=True)
                fps = {key: 1 for key in fps.GetOnBits()}
            else:
                fps = AllChem.GetMorganFingerprint(
                    rdmol, fp_radius, useFeatures=True,
                    useCounts=False).GetNonzeroElements()
        alogp = Descriptors.MolLogP(
            rdmol) if 'alogp' in con_desc_list else None
        mw = Descriptors.MolWt(rdmol) if 'mw' in con_desc_list else None
        n_h_atoms = Descriptors.HeavyAtomCount(
            rdmol) if 'n_h_atoms' in con_desc_list else None
        rtb = Descriptors.NumRotatableBonds(
            rdmol) if 'rtb' in con_desc_list else None
        hbd = Descriptors.NumHDonors(rdmol) if 'hbd' in con_desc_list else None
        hba = Descriptors.NumHAcceptors(
            rdmol) if 'hba' in con_desc_list else None
        return fps, alogp, mw, n_h_atoms, rtb, hbd, hba
    else:
        return None, None, None, None, None, None, None

コード例 #6

0

ファイルを表示

ファイル: moleculeHelper.py プロジェクト: bet-gregori/phitools

def standardize(mol):
    """
    Wrapper to aply the structure normalization protocol provided by Francis Atkinson (EBI). If no non-salt components can be found in the mixture, the original mixture is returned.

    Returns a tuple containing:
        1) True/False: depending on the result of the method
        2) (if True ) The output molecule
           (if False) The error message
    """
    try:
        parent = standardise.run(Chem.MolToMolBlock(mol))
    except standardise.StandardiseException as e:
        if e.name == "no_non_salt":
            parent = Chem.MolToMolBlock(mol)
        else:
            return (False, e.name)

    return (True, parent)

コード例 #7

0

ファイルを表示

ファイル: PRF_evaluation.py プロジェクト: BenderGroup/PRF

def preprocessMolecule(inp):
	if not inp: return False
	def checkC(mm):
		mwt = Descriptors.MolWt(mm)
		for atom in mm.GetAtoms():
			if atom.GetAtomicNum() == 6 and 100 <= mwt <= 1000: return True
		return False
	def checkHm(mm):
		for atom in mm.GetAtoms():
			if atom.GetAtomicNum() in [2,10,13,18]: return False
			if 21 <= atom.GetAtomicNum() <= 32: return False
			if 36 <= atom.GetAtomicNum() <= 52: return False
			if atom.GetAtomicNum() >= 54: return False
		return True
	try: std_mol = standardise.run(inp)
	except standardise.StandardiseException: return None
	if not std_mol or checkHm(std_mol) == False or checkC(std_mol) == False: return None
	else: return std_mol

コード例 #8

0

ファイルを表示

def mol_to_standardised_mol(mol, name=None):
    """Standardise mol(s)."""
    try:
        from standardiser import standardise
        from standardiser.utils import StandardiseException
    except ImportError:
        logging.warning(
            "standardiser module unavailable. Using unstandardised mol.")
        return mol

    if name is None:
        try:
            name = mol.GetProp("_Name")
        except KeyError:
            name = repr(mol)

    if isinstance(mol, PropertyMol):
        mol_type = PropertyMol
        mol = rdkit.Chem.Mol(mol)
    else:
        mol_type = rdkit.Chem.Mol

    logging.debug("Standardising {}".format(name))
    try:
        std_mol = standardise.run(mol)
    except AttributeError:  # backwards-compatible with old standardiser
        std_mol = standardise.apply(mol)
    except StandardiseException:
        logging.error(
            ("Standardisation of {} failed. Using unstandardised "
             "mol.".format(name)),
            exc_info=True,
        )
        return mol_type(mol)

    std_mol = mol_type(std_mol)
    try:
        std_mol.SetProp("_Name", mol.GetProp("_Name"))
    except KeyError:
        pass

    return std_mol

コード例 #9

0

ファイルを表示

            mols_now.append(m)
            ys.append(y_now)
            mol_ids.append(chembl_ids[i])
    activities = ys
    chembl_ids = visited

    from standardiser import standardise
    import logging
    incorrect_mols = []  # to remove those that cannot be standardised
    mols = []
    #standardise.logger.setLevel('DEBUG')
    for i, m in enumerate(mols_now):
        print "Standardizing molecule: ", i
        parent = None
        try:
            parent = standardise.run(m)
            mols.append(parent)
        except standardise.StandardiseException as e:
            logging.warning(e.message)
            incorrect_mols.append(i)

    activities = [
        x for i, x in enumerate(activities) if i not in incorrect_mols
    ]
    chembl_ids = [
        x for i, x in enumerate(chembl_ids) if i not in incorrect_mols
    ]

    #--------------------------------------------------------
    # writing in .sdf format:
    #--------------------------------------------------------

コード例 #10

0

ファイルを表示

ファイル: idata.py プロジェクト: josecarlosgomezt/flame

    def normalize(self, ifile, method):
        '''
        Generates a simplified SDFile with MolBlock and an internal ID for
        further processing

        Note that this method is applied to every molecule and that it removes
        mol blocks in the input SDFile not able to generate a valid mol

        Also, when defined in control, applies chemical standardization
        protocols, like the one provided by Francis Atkinson (EBI),
        accessible from:

            https://github.com/flatkinson/standardiser

        Returns a tuple containing the result of the method and (if True)
        the name of the output molecule and an error message otherwyse

        '''

        success_list = [True for i in range(sdfu.count_mols(ifile))]

        if not method:
            method = ''

        LOG.info('Starting normalization...')
        try:
            suppl = Chem.SDMolSupplier(ifile)
            LOG.debug(f'mol supplier created from {ifile}')
        except Exception as e:
            LOG.error('Unable to create mol supplier with the exception: '
                      f'{e}')
            return False, 'Error at processing input file for standardizing structures'

        filename, fileext = os.path.splitext(ifile)
        ofile = filename + '_std' + fileext
        LOG.debug(f'writing standarized molecules to {ofile}')
        with open(ofile, 'w') as fo:
            mcount = 0
            # merror = 0
            for m in suppl:

                # molecule not recognised by RDKit
                if m is None:
                    LOG.error('Unable to process molecule'
                              f' #{mcount+1} in {ifile}')
                    continue

                name = sdfu.getName(m,
                                    count=mcount,
                                    field=self.parameters['SDFile_name'],
                                    suppl=suppl)

                parent = None

                if 'standardize' in method:
                    try:

                        parent = standardise.run(Chem.MolToMolBlock(m))

                    except standardise.StandardiseException as e:

                        if e.name == "no_non_salt":
                            # very commong warning, use parent mol and proceed
                            LOG.debug(
                                f'"No non salt error" found. Skiped standardize for mol'
                                f' #{mcount} {name}')
                            parent = Chem.MolToMolBlock(m)
                        else:
                            # serious issue, no parent was generated, use original mol
                            if (parent is None):
                                LOG.error(
                                    f'Critical standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Skipping normalization'
                                )
                                parent = Chem.MolToMolBlock(m)
                            # minor isse, parent was generated, show a warning and proceed
                            else:
                                LOG.info(
                                    f'Standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Normalization applied'
                                )
                        #return False, e.name

                    except Exception as e:
                        # this error means an execution error running standardizer
                        # the molecule is discarded and therefore the list of molecules must be updated
                        LOG.error(
                            f'Critical standardize execution exception {e}'
                            f' when processing mol #{mcount} {name}. Discarding molecule'
                        )
                        success_list[mcount] = False
                        continue

                else:
                    LOG.info(f'Skipping normalization.')
                    parent = Chem.MolToMolBlock(m)

                # in any case, write parent plus internal ID (flameID)
                fo.write(parent)

                # *** discarded method to control errors ****
                # flameID = 'fl%0.10d' % mcount
                # fo.write('>  <flameID>\n'+flameID+'\n\n')

                mcount += 1

                # terminator
                fo.write('$$$$\n')

        return success_list, ofile

コード例 #11

0

ファイルを表示

ファイル: sanify_utils.py プロジェクト: joeaaa/pipelines

def flatkinsonStandardizer(mol):
    return standardise.run(mol)

コード例 #12

0

ファイルを表示

    prism_zinc.extend(smile)

for item in [smiles, zinc_smiles]:
    del item
gc.collect()

prism_zinc = shuffle(prism_zinc)

print("Before standardiser: {}".format(len(prism_zinc)))

standard_smiles = []
for i in range(len(prism_zinc)):
    smile = prism_zinc[i]
    try:
        m = Chem.MolToSmiles(Chem.MolFromSmiles(smile),
                             isomericSmiles=True,
                             canonical=True)
        mol = standardise.run(m)
        standard_smiles.append(mol)
    except standardise.StandardiseException:
        pass

print("After standardiser: {}".format(len(standard_smiles)))

del prism_zinc
gc.collect()

with open(
        '/hps/research1/icortes/acunha/data/ZINC_PRISM_SMILES/zinc_prism_smiles_processed.smi',
        "w") as f:
    f.write('\n'.join(standard_smiles))

コード例 #13

0

ファイルを表示

ファイル: insert_ligand.py プロジェクト: suchopaa/chembdb

        continue
    chembl_help.append(list(chembl[i]))
    i = i + 1

#pprint (chembl_help)

#Chembl standardize
for lig in range(0, len(chembl_help)):
    #print ('Now I do this from Chembl: ' + chembl_help[lig][0])
    mol = inchi.MolFromInchi(chembl_help[lig][0], sanitize=False)
    try:
        rdmolops.RemoveStereochemistry(mol)
    except Exception:
        print("Not able to remove stereochemistry. Chembl.")
    try:
        mol = standardise.run(mol)
    except standardise.StandardiseException as e:
        logging.warn(e.message)
    try:
        mol = s.standardize(mol)
    except Exception:
        print("Not able to standardize. Chembl.")
    try:
        mol = s.tautomer_parent(mol, skip_standardize=True)
    except Exception:
        print("Not able to make tautomer parent. Chembl.")
    mol = s.stereo_parent(mol, skip_standardize=True)
    chembl_help[lig][0] = inchi.MolToInchi(mol)

#BDB preparing
bdb_help = []

コード例 #14

0

ファイルを表示

def normalize(inF, outF, singleF, failedF, remove_salts= True, keep_nonorganic= False, verbose=False, pH=7.4) :
      
    count = 0        ## count for the whole dataset
    count_inc = 0    ## count for only included molecules
    count_exc = 0    ## count for only excluded molecules
    all_salts = 0    ## count for entries with only salts / solvent
    fail_sanity = 0  ## count for entries that fail sanity check 
    fail_mol = 0     ## count for entries that fail to create mol object 
    fail_prot = 0    ## count for entries that fail protonation

    header = '%s\n' %('\t'.join(['CAS', 'Component', 'Original smiles', 'smiles']))
    fail_header = '%s\n' %('\t'.join(['CAS', 'Original smiles', 'Error']))

    outF.write(header)
    singleF.write(header)
    failedF.write(fail_header)
    
    for line in inF:
        count += 1
        try:
            cas, smi = line.rstrip().split('\t')
        except:
            print ('Failed parsing line:')
            print (line)
            failedF.write(line.rstrip()+'\tFailed parsing line\n')
            continue
        mol = Chem.MolFromSmiles(smi)
        if mol is None:
            count_exc += 1
            fail_mol += 1
            failedF.write(line.rstrip()+'\tFailed to create molecule object\n')
            continue

        try:
            #mol = standardise.run(mol, keep_nonorganic= keep_nonorganic, remove_salts= remove_salts)
            succ, mol, err = standardise.run(mol, keep_nonorganic= keep_nonorganic)
        except Exception as err:
            err = '{}'.format(err)
            count_exc += 1
            fail_sanity += 1
            failedF.write('{}\t{}\t{}\n'.format(cas, smi, err))
            continue

        i = 1
        if succ:
            count_inc += 1
            nHA = mol.GetNumHeavyAtoms()
            if nHA < 2:
                singleF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(mol, isomericSmiles=True)))
            else:
                outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(mol, isomericSmiles=True)))
                #prot, protMol = protonate(mol, pH)
                #if prot:
                #    outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(protMol, isomericSmiles=True)))
                #else:
                #    failedF.write('{}\t{}\t{}\n'.format(cas, smi, protMol))
                #    fail_prot += 1
        else:
            smis = set([Chem.MolToSmiles(moli, isomericSmiles=True) for moli in mol])
            if err == 'Multiple non-salt/solvate components':
                for smii in smis:
                    moli = Chem.MolFromSmiles(smii)
                    nHA = moli.GetNumHeavyAtoms()
                    if nHA < 2:
                        singleF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, smii))
                    else:
                        outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, smii))
                        #prot, protMol = protonate(Chem.MolFromSmiles(smii), pH)
                        #if prot:
                        #    outF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, Chem.MolToSmiles(protMol, isomericSmiles=True)))
                        #else:
                        #    failedF.write('{}\t{}\t{}\n'.format(cas, smi, protMol))
                        #    fail_prot += 1
                    i += 1
                count_inc += 1
            elif err == 'No non-salt/solvate components':
                metal = False
                for smii in smis:
                    moli = Chem.MolFromSmiles(smii)
                    nHA = moli.GetNumHeavyAtoms()
                    if nHA == 1 and moli.GetAtomWithIdx(0).GetSymbol() in _metals:
                        singleF.write('{}\t{}\t{}\t{}\n'.format(cas, i, smi, smii))
                        metal = True
                        i += 1
                if metal:
                    count_inc += 1
                else:
                    count_exc += 1
                    all_salts += 1
                    failedF.write('{}\t{}\t{}\n'.format(cas, smi, err))
    
    os.system('rm in.sdf out.sdf')
    print ('the full dataset = {}'.format(count))
    print ('Molecules normalized = {}'.format(count_inc))
    print ('Molecules excluded = {}'.format(count_exc))
    print ('   Fail RDkit mol object = {}'.format(fail_mol))
    print ('   Fail protonation = {}'.format(fail_prot))
    print ('   Fail sanity check = {}'.format(fail_sanity))
    print ('   Only salts / solvent = {}'.format(all_salts))

コード例 #15

0

ファイルを表示

ファイル: standardise_mols.py プロジェクト: DmitriR/standardiser

def main():

    ########################################################################
    # 
    # Program Parameters...
    # 

    script_name = os.path.splitext(os.path.basename(sys.argv[0]))[0]

    ######

    # Options, arguments and logging...

    argparser = argparse.ArgumentParser(description="Standardise compounds")

    argparser.add_argument("-V", "--verbose", action="store_true", help="enable verbose logger")
    argparser.add_argument("-r", "--output_rules_applied", action="store_true", help="enable output of rules applied")

    argparser.add_argument("infile", help="Input file (SDF or SMILES)")

    config = argparser.parse_args()

    logger = make_logger.run(__name__)

    ######

    # Initialisation...

    rule_names = ["{:02d} {}".format(x['n'], x['name']) for x in standardise.rules.rule_set]

    counts = Counter({x: 0 for x in list(errors.keys()) + ['read', 'standardised']}) 

    input_type = os.path.splitext(config.infile)[1] # sdf or smi

    ######

    logger.info("Input type = '{in_type}'".format(in_type=input_type))

    if input_type == ".sdf": # Read/write SDF...

        infile = SDF.readFile(open(config.infile))

        outfile = open("standardised.sdf", "w")
        errfile = open("errors.sdf", "w")

        for original in infile:

            counts["read"] += 1

            logger.info(">>> Starting mol '{name}'...".format(name=original.name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(original.molblock, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(original.molblock)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for '{name}'".format(error=errors[err.name], name=original.name))

                counts[err.name] += 1

                errfile.write("{mol}>  <n>\n{nread}\n\n>  <error>\n{error}\n\n$$$$\n".format(mol=original.molblock, nread=counts["read"], error=errors[err.name]))

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=original.name))

                counts["standardised"] += 1

                parent = re.sub(r'^\w*\n', original.name + '\n', parent)

                if config.output_rules_applied:

                    rules_applied = ';'.join(rule_names[x-1] for x in rules_applied) if rules_applied else ''

                    outfile.write("{mol}>  <n>\n{nread}\n\n<rules_applied>\n{rules}\n\n$$$$\n".format(mol=parent, nread=counts["read"], rules=rules_applied))

                else:

                    outfile.write("{mol}>  <n>\n{nread}\n\n$$$$\n".format(mol=parent, nread=counts["read"]))

            if counts["read"] % 100 == 0: logger.info("...done: {read} read, {standardised} OK...".format(**counts))

    else: # Read/write (tab-seperated) SMILES + name...

        infile = csv.reader(open(config.infile), delimiter="\t")

        outfile = csv.writer(open("standardised.smi", "w"), delimiter="\t")
        errfile = csv.writer(open("errors.smi", "w"), delimiter="\t")

        for original in infile:

            counts["read"] += 1

            smiles, name = original

            logger.info(">>> Starting mol '{name}'...".format(name=name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(smiles, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(smiles)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for mol '{name}'".format(error=errors[err.name], name=name))

                counts[err.name] += 1

                errfile.writerow(original + [err.name])

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=name))

                counts["standardised"] += 1

                if config.output_rules_applied:

                    rules_applied = ';'.join(rule_names[x-1] for x in rules_applied) if rules_applied else ''

                    outfile.writerow([parent, name, smiles, rules_applied])

                else:

                    outfile.writerow([parent, name])

            if counts["read"] % 100 == 0: logger.info("...done: {read} read, {standardised} OK...".format(**counts))

    logger.info("Finished: {read} read, {standardised} OK in total.".format(**counts))

    logger.info("Counts: " + json.dumps(counts, indent=4))

コード例 #16

0

ファイルを表示

def main():

    ########################################################################
    #
    # Program Parameters...
    #

    script_name = os.path.splitext(os.path.basename(sys.argv[0]))[0]

    ######

    # Options, arguments and logging...

    argparser = argparse.ArgumentParser(description="Standardise compounds")

    argparser.add_argument("-V",
                           "--verbose",
                           action="store_true",
                           help="enable verbose logger")
    argparser.add_argument("-r",
                           "--output_rules_applied",
                           action="store_true",
                           help="enable output of rules applied")

    argparser.add_argument("-i",
                           dest="infile",
                           help="Input file (SDF or SMILES)")
    argparser.add_argument("-o", dest="outfile", help="Output file")

    config = argparser.parse_args()

    logger = make_logger.run(__name__)

    ######

    # Initialisation...

    rule_names = [
        "{:02d} {}".format(x['n'], x['name'])
        for x in standardise.rules.rule_set
    ]

    counts = Counter(
        {x: 0
         for x in list(errors.keys()) + ['read', 'standardised']})

    input_type = os.path.splitext(config.infile)[1]  # sdf or smi
    outfile_basename = os.path.splitext(config.infile)[0]
    outfile_ext = os.path.splitext(config.infile)[1]

    ######

    logger.info("Input type = '{in_type}'".format(in_type=input_type))

    if input_type == ".sdf":  # Read/write SDF...

        infile = SDF.readFile(open(config.infile))

        outfile = open(config.outfile, "w")
        errfile = open(outfile_basename + "_errors." + outfile_ext, "w")

        for original in infile:

            counts["read"] += 1

            logger.info(
                ">>> Starting mol '{name}'...".format(name=original.name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(
                        original.molblock, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(original.molblock)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for '{name}'".format(
                    error=errors[err.name], name=original.name))

                counts[err.name] += 1

                errfile.write(
                    "{mol}>  <n>\n{nread}\n\n>  <error>\n{error}\n\n$$$$\n".
                    format(mol=original.molblock,
                           nread=counts["read"],
                           error=errors[err.name]))

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=original.name))

                counts["standardised"] += 1

                parent = re.sub(r'^\w*\n', original.name + '\n', parent)

                if config.output_rules_applied:

                    rules_applied = ';'.join(
                        rule_names[x - 1]
                        for x in rules_applied) if rules_applied else ''

                    outfile.write(
                        "{mol}>  <n>\n{nread}\n\n<rules_applied>\n{rules}\n\n$$$$\n"
                        .format(mol=parent,
                                nread=counts["read"],
                                rules=rules_applied))

                else:

                    outfile.write("{mol}>  <n>\n{nread}\n\n$$$$\n".format(
                        mol=parent, nread=counts["read"]))

            if counts["read"] % 100 == 0:
                logger.info(
                    "...done: {read} read, {standardised} OK...".format(
                        **counts))

    else:  # Read/write (tab-seperated) SMILES + name...

        infile = csv.reader(open(config.infile), delimiter="\t")
        outfile = csv.writer(open(config.outfile, "w"), delimiter="\t")
        errfile_name = outfile_basename + "_errors." + outfile_ext
        errfile = csv.writer(open(errfile_name, "w"), delimiter="\t")

        for original in infile:

            counts["read"] += 1

            smiles, name = original

            logger.info(">>> Starting mol '{name}'...".format(name=name))

            ok = True

            try:

                if config.output_rules_applied:

                    rules_applied = []

                    parent = standardise.run(
                        smiles, output_rules_applied=rules_applied)

                else:

                    parent = standardise.run(smiles)

            except standardise.StandardiseException as err:

                logger.warn(">>> {error} for mol '{name}'".format(
                    error=errors[err.name], name=name))

                counts[err.name] += 1

                errfile.writerow(original + [err.name])

                ok = False

            if ok:

                logger.info("Mol '{name}' OK".format(name=name))

                counts["standardised"] += 1

                if config.output_rules_applied:

                    rules_applied = ';'.join(
                        rule_names[x - 1]
                        for x in rules_applied) if rules_applied else ''

                    outfile.writerow([parent, name, smiles, rules_applied])

                else:

                    outfile.writerow([parent, name])

            if counts["read"] % 100 == 0:
                logger.info(
                    "...done: {read} read, {standardised} OK...".format(
                        **counts))

    logger.info(
        "Finished: {read} read, {standardised} OK in total.".format(**counts))

    logger.info("Counts: " + json.dumps(counts, indent=4))