def _molidsmiles_it_ecfp(output_file, start=0, step=46, fcfp=True, logeach=5000): """Q&D variant to allow Parallel work (cannot pickle closures or reuse iterators...).""" processor = _ecfp_writer(output_file=output_file, fcfp=fcfp) mols = read_smiles_ultraiterator() for molindex, (molid, smiles) in enumerate(islice(mols, start, None, step)): if logeach > 0 and molindex > 0 and not molindex % logeach: info('Molecule %d' % molindex) processor(molid, smiles) processor(_END_MOLID, None)
def _molidsmiles_it(start=0, step=46, mols=None, processor=None, logeach=500): """Iterates (molindex, molid, smiles) triplets skipping step molecules in each iteration. This is useful for evenly splitting workloads between processors / machines. Parameters: - start: the index of the first pair to consider - step: how many molecules are skipped on each iteration - mols: an iterator (molid, smiles) - processor: a function that gets called for each pair; when the iterator is exhausted, (_END_MOLID, None) is sent. """ if mols is None: mols = read_smiles_ultraiterator() for molindex, (molid, smiles) in enumerate(islice(mols, start, None, step)): if logeach > 0 and molindex > 0 and not molindex % logeach: info('Molecule %d' % molindex) processor(molid, smiles) processor(_END_MOLID, None)