def main(molecules_file, log=None, overwrite=False, parallel_mode=None, num_proc=None, merge_confs=False, save_freq=SAVE_FREQ, compress=False): setup_logging(log) para = Parallelizer(parallel_mode=parallel_mode, num_proc=num_proc) if para.is_master(): data_iter = ((molecules_file, i, para.num_proc - 1) for i in range(para.num_proc - 1)) else: data_iter = iter([]) kwargs = { "overwrite": overwrite, "merge_confs": merge_confs, "save_freq": save_freq, "compress": compress } para.run(run_batch, data_iter, kwargs=kwargs)
def run( mol2=None, smiles=None, standardise=STANDARDISE_DEF, num_conf=NUM_CONF_DEF, first=FIRST_DEF, pool_multiplier=POOL_MULTIPLIER_DEF, rmsd_cutoff=RMSD_CUTOFF_DEF, max_energy_diff=MAX_ENERGY_DIFF_DEF, forcefield=FORCEFIELD_DEF, seed=SEED_DEF, params=None, prioritize=False, out_dir=OUTDIR_DEF, compress=COMPRESS_DEF, overwrite=False, values_file=None, log=None, num_proc=None, parallel_mode=None, verbose=False, ): """Run conformer generation.""" setup_logging(log, verbose=verbose) if params is not None: params = read_params(params) standardise = get_value(params, "preprocessing", "standardise", bool) num_conf = get_value(params, "conformer_generation", "num_conf", int) first = get_value(params, "conformer_generation", "first", int) pool_multiplier = get_value(params, "conformer_generation", "pool_multiplier", int) rmsd_cutoff = get_value(params, "conformer_generation", "rmsd_cutoff", float) max_energy_diff = get_value(params, "conformer_generation", "max_energy_diff", float) forcefield = get_value(params, "conformer_generation", "forcefield") seed = get_value(params, "conformer_generation", "seed", int) # check args if forcefield not in FORCEFIELD_CHOICES: raise ValueError( "Specified forcefield {} is not in valid options {!r}".format( forcefield, FORCEFIELD_CHOICES)) para = Parallelizer(num_proc=num_proc, parallel_mode=parallel_mode) # Check to make sure args make sense if mol2 is None and smiles is None: if para.is_master(): parser.print_usage() logging.error("Please provide mol2 file or a SMILES file.") sys.exit() if mol2 is not None and smiles is not None: if para.is_master(): parser.print_usage() logging.error("Please provide only a mol2 file OR a SMILES file.") sys.exit() if num_proc and num_proc < 1: if para.is_master(): parser.print_usage() logging.error( "Please provide more than one processor with `--num_proc`.") sys.exit() # Set up input type if mol2 is not None: in_type = "mol2" elif smiles is not None: in_type = "smiles" if para.is_master(): if in_type == "mol2": logging.info("Input type: mol2 file(s)") logging.info("Input file number: {:d}".format(len(mol2))) mol_iter = (mol_from_mol2(_mol2_file, _name, standardise=standardise) for _mol2_file, _name in mol2_generator(*mol2)) else: logging.info("Input type: Detected SMILES file(s)") logging.info("Input file number: {:d}".format(len(smiles))) mol_iter = (mol_from_smiles(_smiles, _name, standardise=standardise) for _smiles, _name in smiles_generator(*smiles)) if prioritize: logging.info(("Prioritizing mols with low rotatable bond number" " and molecular weight first.")) mols_with_properties = [( AllChem.CalcNumRotatableBonds(mol), AllChem.CalcExactMolWt(mol), mol, ) for mol in mol_iter if mol is not None] data_iterator = make_data_iterator( (x[-1] for x in sorted(mols_with_properties))) else: data_iterator = make_data_iterator( (x for x in mol_iter if x is not None)) # Set up parallel-specific options logging.info("Parallel Type: {}".format(para.parallel_mode)) # Set other options touch_dir(out_dir) if not num_conf: num_conf = -1 logging.info("Out Directory: {}".format(out_dir)) logging.info("Overwrite Existing Files: {}".format(overwrite)) if values_file is not None: if os.path.exists(values_file) and overwrite is not True: value_args = (values_file, "a") logging.info("Values file: {} (append)".format((values_file))) else: value_args = (values_file, "w") logging.info("Values file: {} (new file)".format( (values_file))) if num_conf is None or num_conf == -1: logging.info("Target Conformer Number: auto") else: logging.info("Target Conformer Number: {:d}".format(num_conf)) if first is None or first == -1: logging.info("First Conformers Number: all") else: logging.info("First Conformers Number: {:d}".format(first)) logging.info("Pool Multiplier: {:d}".format(pool_multiplier)) logging.info("RMSD Cutoff: {:.4g}".format(rmsd_cutoff)) if max_energy_diff is None: logging.info("Maximum Energy Difference: None") else: logging.info("Maximum Energy Difference: {:.4g} kcal".format( max_energy_diff)) logging.info("Forcefield: {}".format(forcefield.upper())) if seed != -1: logging.info("Seed: {:d}".format(seed)) logging.info("Starting.") else: data_iterator = iter([]) gen_conf_kwargs = { "out_dir": out_dir, "num_conf": num_conf, "rmsd_cutoff": rmsd_cutoff, "max_energy_diff": max_energy_diff, "forcefield": forcefield, "pool_multiplier": pool_multiplier, "first": first, "seed": seed, "save": True, "overwrite": overwrite, "compress": compress, } run_kwargs = {"kwargs": gen_conf_kwargs} results_iterator = para.run_gen(generate_conformers, data_iterator, **run_kwargs) if para.is_master() and values_file is not None: hdf5_buffer = HDF5Buffer(*value_args) for result, data in results_iterator: if (para.is_master() and values_file is not None and result is not False): values_to_hdf5(hdf5_buffer, result) if para.is_master() and values_file is not None: hdf5_buffer.flush() hdf5_buffer.close()