Exemple #1
0
def _molidsmiles_it_ecfp(output_file, start=0, step=46, fcfp=True, logeach=5000):
    """Q&D variant to allow Parallel work (cannot pickle closures or reuse iterators...)."""
    processor = _ecfp_writer(output_file=output_file, fcfp=fcfp)
    mols = read_smiles_ultraiterator()
    for molindex, (molid, smiles) in enumerate(islice(mols, start, None, step)):
        if logeach > 0 and molindex > 0 and not molindex % logeach:
            info('Molecule %d' % molindex)
        processor(molid, smiles)
    processor(_END_MOLID, None)
Exemple #2
0
def _molidsmiles_it(start=0, step=46, mols=None, processor=None, logeach=500):
    """Iterates (molindex, molid, smiles) triplets skipping step molecules in each iteration.
    This is useful for evenly splitting workloads between processors / machines.
    Parameters:
      - start: the index of the first pair to consider
      - step: how many molecules are skipped on each iteration
      - mols: an iterator (molid, smiles)
      - processor: a function that gets called for each pair;
                   when the iterator is exhausted, (_END_MOLID, None) is sent.
    """
    if mols is None:
        mols = read_smiles_ultraiterator()
    for molindex, (molid, smiles) in enumerate(islice(mols, start, None, step)):
        if logeach > 0 and molindex > 0 and not molindex % logeach:
            info('Molecule %d' % molindex)
        processor(molid, smiles)
    processor(_END_MOLID, None)