def _get_pg_thread(args): global data_ss threadobmol, terminating, PROCNAME = data_ss set_procname(PROCNAME + ".%d" % (os.getpid())) try: if not terminating.is_set(): i, tolerance = args sym = maagbel.OBPointGroup() sym.Setup(threadobmol, i) pg = sym.IdentifyPointGroup(tolerance) del sym except KeyboardInterrupt: print( "Terminating worker process " + str(os.getpid()) + " prematurely.", file=sys.stderr, ) return i, pg
def scan_main(parser): """Main control function for the scanning procedure. Args: parser: (of class ManipulateAggregates.collection.read.SectionlessConfigParser) contains information about the config file. Defines the methods "get_str", "get_int", "get_float" and "get_boolean" to get the appropriate data type. """ global grid set_procname(PROCNAME) gets = parser.get_str geti = parser.get_int getf = parser.get_float getb = parser.get_boolean do_calculate = not (getb("config_check")) # do some error checking # forcefield if gets("forcefield").lower() not in ["uff", "mmff94", "gaff", "ghemical"]: raise ValueError( 'Wrong foce field given. Only "uff", "mmff94", "gaff" and "ghemical" will be accepted.' ) temp_ff = OBForceField.FindType(gets("forcefield").lower()) if temp_ff is None: raise ValueError( "Somehow there was an error loading the forcefield %s (although it should be known to OpenBabel)." % (gets("forcefield").lower())) del temp_ff # boolean values for check_option in [ "save_dx", "save_aligned", "save_noopt", "save_opt", "correct", "sp_opt", "sp_correct", "sp_remove", "globalopt", "prealign", "gzipped", ]: getb(check_option) # remaining float values for check_option in [ "cutoff", "vdw_scale", "maxval", "cutoff", "vdw_scale" ]: getf(check_option) # remaining integer values for check_option in [ "columns", "optsteps", "progress", "hashwidth", "hashdepth" ]: geti(check_option) # check whether some options conflict if (gets("volumetric_data").startswith("from_scan,") and "minimasearch" in gets("jobtype").split(",") and not getb("save_dx")): print( "WARNING: a subsequent minimasearch tries to get its dx-files from this scan but", file=sys.stderr, ) print( " you requested not to save dx-files. This is probably an error (but not so if", file=sys.stderr, ) print( " you requested those dx-files to be used from a different directory) so please check.", file=sys.stderr, ) # initialize directory name hashing init_hashing(geti("hashdepth"), geti("hashwidth"), gets("hashalg")) # value for progress reports if geti("progress") not in [0, 1, 2]: raise ValueError( 'Wrong value for parameter "progress" given. Must be 0,1 or 2.') # populate all variables with the given values # read in the two molecules/aggregates from the given files mol1 = read_from_file(gets("geometry1"), ff=None) mol2 = read_from_file(gets("geometry2"), ff=None) # Compute radii of spheres that completely encompass both molecules to be able to # auto-adjust the gridsize mol1_vdw = mol1.get_vdw_radii() mol2_vdw = mol2.get_vdw_radii() mol1_coords = mol1.get_coordinates() mol2_coords = mol2.get_coordinates() mol1_center = numpy.mean(numpy.array(mol1_coords, dtype=float), axis=0) mol2_center = numpy.mean(numpy.array(mol2_coords, dtype=float), axis=0) # Will contain the radius of a sphere centered at the molecular center that # completely encompasses mol1 maxdist1 = 0.0 # Will contain the radius of a sphere centered at the molecular center that # completely encompasses mol2 maxdist2 = 0.0 distcutoff = getf("cutoff") vdwscale = getf("vdw_scale") for c1, vdw1 in zip(mol1_coords, mol1_vdw): npc1 = numpy.array(c1, dtype=float) if numpy.linalg.norm(npc1 - mol1_center) + (vdw1 * vdwscale) > maxdist1: maxdist1 = numpy.linalg.norm(npc1 - mol1_center) + (vdw1 * vdwscale) for c2, vdw2 in zip(mol2_coords, mol2_vdw): npc2 = numpy.array(c2, dtype=float) if numpy.linalg.norm(npc2 - mol2_center) + (vdw2 * vdwscale) > maxdist2: maxdist2 = numpy.linalg.norm(npc2 - mol2_center) + (vdw2 * vdwscale) # The radius of the sphere outside which no points need to be considered # with reduced precision and rounded up bigspherestr = "%.3f" % (maxdist1 + maxdist2 + distcutoff + 0.0005) bigsphererad = float(bigspherestr) # treat grid auto adjustments if gets("sp_gridtype") in ("full", "half"): # these are only the counts in one direction np_counts = numpy.array(list(map(int, gets("countsxyz").split(","))), dtype=int) np_del = numpy.array(list(map(float, gets("distxyz").split(","))), dtype=float) np_org = numpy.array([0, 0, 0], dtype=float) newdistxyz = numpy.array( list( map( lambda f: float("%.3f" % (f)), (bigsphererad + 0.0005) / (np_counts - 1), )), dtype=float, ) newcountsxyz = (numpy.array( list(map(int, (bigsphererad + 0.0005) / np_del)), dtype=int) + 1) distsdiffer = numpy.linalg.norm(newdistxyz - np_del) > 0.0 countsdiffer = numpy.linalg.norm(newcountsxyz - np_counts) > 0.0 tmpstring = "..." if gets("sp_autoadjust") == "distxyz": if distsdiffer: tmpstring += "changing grid size in some directions by adjusting grid spacing 'distxyz' by: " tmpstring += "x: %+.3f, y: %+.3f, z: %+.3f" % ( tuple(newdistxyz - np_del)) np_del = newdistxyz else: tmpstring += "grid dimensions need no adjustment" elif gets("sp_autoadjust") == "countsxyz": if countsdiffer: tmpstring += "changing grid size in some directions by adjusting number of points 'countsxyz' by: " tmpstring += "x: %+d, y: %+d, z: %+d" % (tuple(newcountsxyz - np_counts)) np_counts = newcountsxyz else: tmpstring += "grid dimensions need no adjustment" elif gets("sp_autoadjust") in ("", "none"): if countsdiffer or distsdiffer: tmpstring += "won't adjust, but grid dimensions inappropriate " tmpstring += "in some directions by (distxyz/countsxyz): " tmpstring += "x: %+.3f/%+d, y: %+.3f/%+d, z: %+.3f/%+d" % ( tuple(a for b in zip(newdistxyz - np_del, newcountsxyz - np_counts) for a in b)) else: tmpstring += "grid dimensions need no adjustment" else: raise ValueError("Wrong value for config value sp_autoadjust.") tmpstring += "..." print(tmpstring) with open(gets("sp_gridsave"), "w") as gf: writeto(gf, "TYPE %s" % (gets("sp_gridtype"))) if gets("sp_gridtype") == "half": writeto(gf, "%s" % (gets("halfspace"))) writeto(gf, "\n") writeto(gf, "COUNTS %d,%d,%d\n" % tuple(np_counts)) writeto(gf, "DIST %.3f,%.3f,%.3f\n" % tuple(np_del)) writeto(gf, "ORG %.3f,%.3f,%.3f\n" % tuple(np_org)) else: if not gets("sp_autoadjust") in ("", "none"): print( "WARNING: grid auto-adjustment not supported for current gridtype %s" % (gets("sp_gridtype")), file=sys.stderr, ) # spatial grid: check gridtype and set-up grid if gets("sp_gridtype") == "full": if do_calculate: np_grid = general_grid(np_org, np_counts, np_counts, np_del) dx_dict = { "filename": gets("suffix"), "counts": list(2 * np_counts + 1), "org": list(np_grid[0]), "delx": [np_del[0], 0.0, 0.0], "dely": [0.0, np_del[1], 0.0], "delz": [0.0, 0.0, np_del[2]], } dx_dict["save_dx"] = getb("save_dx") dx_dict["gzipped"] = getb("gzipped") else: gets("suffix") getb("save_dx") elif gets("sp_gridtype") == "half": np_counts_pos = numpy.array([c for c in np_counts]) np_counts_neg = numpy.array([c for c in np_counts]) halfspace_vec = list(map(int, gets("halfspace").split(","))) for i in (0, 1, 2): if halfspace_vec[i] < 0: np_counts_pos[i] = abs(halfspace_vec[i]) if halfspace_vec[i] > 0: np_counts_neg[i] = abs(halfspace_vec[i]) if do_calculate: np_grid = general_grid(np_org, np_counts_pos, np_counts_neg, np_del) dx_dict = { "filename": gets("suffix"), "counts": list(np_counts_pos + np_counts_neg + 1), "org": list(np_grid[0]), "delx": [np_del[0], 0.0, 0.0], "dely": [0.0, np_del[1], 0.0], "delz": [0.0, 0.0, np_del[2]], } dx_dict["save_dx"] = getb("save_dx") dx_dict["gzipped"] = getb("gzipped") else: gets("suffix") getb("save_dx") else: raise ValueError("Wrong value for config value sp_gridtype.") # check whether this gives an error restarted = len(gets("scan_restartdirs")) > 0 if restarted: olddirs = gets("scan_restartdirs").split(",") for d in olddirs: if not os.path.isdir(d): if do_calculate: print( "WARNING: directory supposed to contain dx files from previous runs %s does not exist. Skipping." % (d), file=sys.stderr, ) else: raise ValueError( "Directory supposed to contain dx files from previous runs %s does not exist." % (d)) # angular grid: check gridtype and set-up grid if gets("ang_gridtype") == "full": # these are the counts and distances for rotation countsposmain = numpy.array(list(map(int, gets("countspos").split(","))), dtype=int) countsnegmain = numpy.array(list(map(int, gets("countsneg").split(","))), dtype=int) distmain = numpy.array(list(map(float, gets("dist").split(","))), dtype=float) if do_calculate: np_rot = general_grid(numpy.array([0.0, 0.0, 0.0]), countsposmain, countsnegmain, distmain) else: raise ValueError("Wrong value for config value ang_gridtype.") partition = tuple(map(int, gets("partition").split("/"))) if len(partition) != 2: raise ValueError( "Format for 'partition' must be I1/I2 with I1 and I2 positive integers and I1<=I2" ) if partition[0] > partition[1] or partition[0] < 1 or partition[1] < 1: raise ValueError( "Format for 'partition' must be I1/I2 with I1 and I2 positive integers and I1<=I2" ) # Create a mask for points whose energy never has to be evaluated # A "False" associated with a point means "do not evaluate its energy". try: numpy.sqrt( numpy.einsum( "ij,ij->i", numpy.array([[1.0, 1.0, 0.0]]), numpy.array([[1.0, 1.0, 0.0]]), )) numpy.linalg.norm(numpy.array([[1.0, 1.0, 0.0]]), axis=1) except AttributeError: normaxisone = lambda array: numpy.apply_along_axis( numpy.linalg.norm, 1, array) except TypeError: normaxisone = lambda array: numpy.sqrt( numpy.einsum("ij,ij->i", array, array)) else: normaxisone = lambda array: numpy.linalg.norm(array, axis=1) mask = numpy.ones((len(np_grid), ), dtype=bool) dist = numpy.zeros((len(np_grid), ), dtype=float) origin = numpy.array([0.0, 0.0, 0.0], dtype=float) min1_vdw = min(mol1_vdw) min2_vdw = min(mol2_vdw) for c1, vdw1 in zip(mol1_coords, mol1_vdw): npc1 = numpy.array(c1, dtype=float) vdw = (vdw1 + min2_vdw) * vdwscale dist = normaxisone(np_grid - (npc1 - mol1_center)) # dist = numpy.linalg.norm((np_grid-(npc1-mol1_center)),axis=1) mask[dist < vdw] = False inmasked = numpy.sum(mask == False) dist = normaxisone(np_grid - origin) # dist = numpy.linalg.norm((np_grid-origin),axis=1) mask[dist > bigsphererad] = False outmasked = numpy.sum(mask == False) - inmasked print( "...computed mask, reduction in points: %.2f%%, inside: %.2f%%, outside: %.2f%%..." % ( 100.0 * (inmasked + outmasked) / len(mask), 100.0 * inmasked / len(mask), 100.0 * outmasked / len(mask), )) if not do_calculate: return # align the two molecules and append one to the other # after this, mol1 and mol2 can no longer be used obmol = prepare_molecules( mol1, mol2, gets("aligned_suffix"), save_aligned=getb("save_aligned"), align=getb("prealign"), ) # convert the grid to C data types grid = double_dist(np_grid) if restarted: print("This is a restarted run (old files are in: %s)" % (gets("scan_restartdirs"))) olddxfiles = get_old_dxfiles( gets("scan_restartdirs").split(","), gets("suffix")) print("Number of already existing dx files: %d" % (len(olddxfiles))) else: olddxfiles = {} # For every angle, scan the entire spatial grid and save # each optimum geometry if desired # Will also return a structure making it easy to find the optimum # for every spatial point transrot_result = _transrot_en( obmol, gets("forcefield").lower(), grid, np_rot, getf("maxval"), dx_dict, getb("correct"), getf("cutoff"), getf("vdw_scale"), report=geti("progress"), reportmax=len(np_rot), save_noopt=getb("save_noopt"), save_opt=getb("save_opt"), optsteps=geti("optsteps"), olddxfiles=olddxfiles, partition=partition, mask=mask, ) del grid # the grid in C data types is no longer needed since the scan has already been performed # Evaluate transrot_result to find the angular optimum for every # spatial grid point, if so desired if getb("sp_opt"): dx_dict["filename"] = gets("sp_opt_dx") dx_dict["save_dx"] = getb("sp_opt") _sp_opt( gets("sp_opt_dx"), gets("sp_opt_xyz"), gets("sp_opt_ang"), # filenames dx_dict, # data about the dx-file (header and how to save it) getb("sp_correct"), getb("sp_remove"), getf("maxval"), # data concerning postprocessing of energy data getb("globalopt"), # is the global optimum desired? obmol, np_grid, # data needed to print out xyz-files at the optimum geometries transrot_result, # see above )
def _transrot_en_process(args): """Each worker process executes this function. Args: args: (list) arguments to be passed to the worker processes (via pickling) """ global data_s defaultobmol, transgrid, terminating, PROCNAME, mask = data_s if mask is not None: notmasked = lambda i: mask[i] else: notmasked = lambda i: True set_procname(PROCNAME + ".%d" % (os.getpid())) try: if not terminating.is_set(): ( a1, a2, a3, ffname, report, maxval, dx_dict, correct, savetemplate, templateprefix, anglecount, count, save_noopt, save_opt, optsteps, cutoff, vdw_scale, oldfile, ) = args angle_string = str(a1) + "," + str(a2) + "," + str(a3) angle_comment = "angles=(" + angle_string + ")" if oldfile is not None: compute = False try: old = read_dx( oldfile, grid=False, data=True, silent=True, comments=True, gzipped=dx_dict["gzipped"], ) except ValueError as e: print( "Error when reading in old dx-file %s, recomputing. Error was:" % (oldfile), e, file=sys.stderr, ) compute = True if not compute: old_a1, old_a2, old_a3 = list( map(float, re.split(r",|\(|\)", old["comments"][0])[1:4])) if not (a1, a2, a3) == (old_a1, old_a2, old_a3): print( "WARNING: old dx-file %s treated %s with index %d. This is also my index but I treat %s. Recomputing." % (oldfile, old["comments"][0], anglecount, angle_comment), file=sys.stderr, ) compute = True else: energies = old["data"].tolist() del old if not compute: if not len(transgrid) == len(energies): print( "WARNING: old dx-file %s contains %d entries but the spatial grid is supposed to have %d entries. Recomputing." % (oldfile, len(energies), len(transgrid)), file=sys.stderr, ) compute = True else: compute = True if compute or savetemplate: obmol = OBAggregate(defaultobmol) obff = OBForceField.FindForceField(ffname) rotfunc = obmol.RotatePart rotfunc(0, 1, a1) rotfunc(0, 2, a2) rotfunc(0, 3, a3) if compute: energies = _trans_en( obmol, obff, transgrid, maxval * 1.2, cutoff, vdw_scale, report=report, notmasked=notmasked, ) if correct or dx_dict["save_dx"]: # create a copy which can then be changed and possibly saved tempenergies = copy.copy(energies) if correct: try: actualmax = max( (e for e in tempenergies if not e >= maxval)) except ValueError: actualmax = maxval tempenergies = [ actualmax if e >= maxval else e for e in tempenergies ] if dx_dict["save_dx"]: print_dx_file( str(anglecount) + "_", True, dx_dict, tempenergies, angle_comment, ) if correct or dx_dict["save_dx"]: del tempenergies if savetemplate: minindex = energies.index(min(energies)) template = grid[minindex][0] obmol.TranslatePart(0, template) if obmol.IsGoodVDW(vdw_scale): if save_noopt: filename = (templateprefix + str(anglecount) + "_" + angle_string + ".xyz") pybel.Molecule(obmol).write("xyz", filename, overwrite=True) if save_opt: filename = (templateprefix + "opt_" + str(anglecount) + "_" + angle_string + ".xyz") p_tempmol = pybel.Molecule(obmol) p_tempmol.localopt(forcefield=ffname, steps=optsteps) p_tempmol.write("xyz", filename, overwrite=True) # returning the molecule to its original state is not necessary since every worker process # creates its own instance and leaves the original one as is except KeyboardInterrupt: print( "Terminating worker process " + str(os.getpid()) + " prematurely.", file=sys.stderr, ) return anglecount, (a1, a2, a3), energies, minindex
the hashes of their names. Usage: hashsort.py DIRECTORY [REGEX] The mandatory first argument DIRECTORY must be a directory that already exists. All files in this directory whose names match the regular expression provided as the second, optional argument (default: '^[1-9][0-9]*_out.dx$') will be sorted into subdirectories. This will use the MD5 hasing algorithm. """ # default process name PROCNAME = "HashSort" try: from FireDeamon import set_procname set_procname(PROCNAME) except ImportError: set_procname = lambda s: None # default regular expression REGEX = "^[1-9][0-9]*_out.dx$" def entrypoint(): for arg in sys.argv: if arg == "--help" or arg == "-h": print(HELPTEXT) exit(0) if len(sys.argv) < 3: raise ValueError("Not enough arguments given.") dir = sys.argv[1]
def _main(input_file): # default config config = DEFAULT_CONFIG options = [o for o in config] + list( set([mo for mopts in MANDATORY_OPTIONS.values() for mo in mopts])) parser = read_config_file(input_file, defaults=config, nocase=True) jobtype_list = parser.get_str("jobtype") # jobtypes have long and short names but both shall be treated the same so the # following is a mapping of the long and short forms to a unified form jobtype_dict = { "scan": "scan", "s": "scan", "minimasearch": "minima search", "ms": "minima search", "similarityscreening": "similarity screening", "ss": "similarity screening", } functions_dict = { "scan": scan_main, "minima search": minimasearch_main, "similarity screening": similarityscreening_main, } # Jobs have to be performed in a certain order to make sense. The order is given # in the following dictionary (starting at 0 and increasing): order_dict = {"scan": 0, "minima search": 1, "similarity screening": 2} try: # sort the jobtypes to be performed by an increasing order parameter jobtype_list = sorted( # since each job must not be performed multiple times, use a set to get the # irreducible set of jobtypes set( # create a list of tuples each consisting of the desired jobtype name # and its order parameter [(jobtype_dict[e.lower()], order_dict[jobtype_dict[e.lower()]]) for e in jobtype_list.split(",")]), key=operator.itemgetter(1), ) except KeyError as e: raise ValueError( "Given short or long form does not match any known jobtype: %s" % (e)) # check whether all mandatory options are present missing_options = [] for jobtype, discard in jobtype_list: for opt in MANDATORY_OPTIONS[jobtype]: try: parser.get_str(opt) except NoOptionInConfigFileError: missing_options.append(opt) if len(missing_options) > 0: print( "ERROR: could not find the following mandatory options in the config file:", file=sys.stderr, ) for o in missing_options: print(o, file=sys.stderr) raise NoOptionInConfigFileError("Incomplete input.") del missing_options unknown_options = parser.check_against(options) if len(unknown_options) > 0: print( "WARNING: the following are unknown lines in the config file:", file=sys.stderr, ) for o in unknown_options: print(o, file=sys.stderr) print(file=sys.stderr) del unknown_options if parser.get_boolean("config_check"): print("This is a check of the config file.") for jobtype, __ in jobtype_list: jobtype_main = functions_dict[jobtype] print("Running %s..." % (jobtype)) jobtype_main(parser) set_procname(PROCNAME) print("...finished %s\n" % (jobtype)) if parser.get_boolean("config_check"): print("Config file seems fine.")
def _minimasearch_process(args): """Each worker process executes this function. Args: args: (list) arguments to be passed to the worker processes (via pickling) """ global data_ms c_neighbour_list, terminating, PROCNAME = data_ms set_procname(PROCNAME + ".%d" % (os.getpid())) try: if not terminating.is_set(): ( single_file, degeneration, nr_neighbours, progress, upper_cutoff, lower_cutoff, depths_sort, gzipped, ) = args try: temp = read_dx( single_file, grid=False, data=True, silent=True, comments=True, gzipped=gzipped, ) except ValueError as e: print( "Error when reading in dx-file %s, skipping. Error was:" % (single_file), e, file=sys.stderr, ) return None a1, a2, a3 = list( map(float, re.split(r",|\(|\)", temp["comments"][0])[1:4])) tempvalues = temp["data"] depths = [] minima = LocalMinimaPy( c_neighbour_list, tempvalues, degeneration, nr_neighbours, prog_report=(progress == 1), upper_cutoff=upper_cutoff, lower_cutoff=lower_cutoff, sort_it=depths_sort, depths=depths, ) except KeyboardInterrupt: print( "Terminating worker process " + str(os.getpid()) + " prematurely.", file=sys.stderr, ) if depths_sort == 0: depths = [0.0] * len(minima) return minima, depths, [tempvalues[m] for m in minima], (a1, a2, a3)
def minimasearch_main(parser): """Main control function for the minima search procedure. Args: parser: (of class ManipulateAggregates.collection.read.SectionlessConfigParser) contains information about the config file. Defines the methods "get_str", "get_int", "get_float" and "get_boolean" to get the appropriate data type. """ set_procname(PROCNAME) gets = parser.get_str geti = parser.get_int getf = parser.get_float getb = parser.get_boolean do_calculate = not (getb("config_check")) # do some error checking # value for progress reports if geti("progress") not in [0, 1, 2]: raise ValueError( 'Wrong value for parameter "progress" given. Must be 0,1 or 2.') else: progress = geti("progress") # check whether partitioning over nodes was switched on if not gets("partition") == "1/1": raise ValueError( "Parallelization unequal 1/1 not suported for minima search.") # boolean values # NONE YET PRESENT FOR THIS JOBTYPE # string values if not gets("neighbour_check_type") in [ "eukledian", "manhattan_single", "manhattan_multiple", ]: raise ValueError( "Option neighbour_check must be 'eukledian', 'manhattan_single' or 'manhattan_multiple'." ) # float values (or lists of floats) cutoff_scale = getf("cutoff_scale") init_hashing(geti("hashdepth"), geti("hashwidth"), gets("hashalg")) for check_option in ["degeneration", "maxval", "depths_sort"]: getf(check_option) # check whether some options conflict # NO CONFLICTS KNOWN YET # spatial grid: check gridtype and set-up grid # read in parameters that are required in any case for the appropriate gridtypes if gets("sp_gridtype") in ("full", "half"): np_counts = numpy.array(list(map(int, gets("countsxyz").split(","))), dtype=int) np_del = numpy.array(list(map(float, gets("distxyz").split(","))), dtype=float) np_org = numpy.array([0, 0, 0], dtype=float) # treat auto-adjustment if not gets("sp_autoadjust") in ("", "none"): gfdict = {"TYPE": "%s" % (gets("sp_gridtype"))} # For each type of grid, define which parameters are allowed to be auto-adjusted # and how many can be adjusted at the same time. # Also, generate string representations of what you expect to find in the # gridfile in order to find out which parameters deviate. if gets("sp_gridtype") in ("full", "half"): gfdict["COUNTS"] = "%d,%d,%d" % tuple(np_counts) gfdict["DIST"] = "%.3f,%.3f,%.3f" % tuple(np_del) gfdict["ORG"] = "%.3f,%.3f,%.3f" % tuple(np_org) gfallowed = ("COUNTS", "DIST") gfmaxadjust = 1 else: raise ValueError( "WARNING: grid auto-adjustment not supported for current gridtype %s" % (gets("sp_gridtype"))) if gets("sp_gridtype") == "half": gfdict["TYPE"] += gets("halfspace") # Check whether file containing information about auto-adjustment exists and # whether too many deviations are found. try: with open(gets("sp_gridsave"), "r") as gf: deviations = 0 devstring = "" for line in gf: l = line.rstrip().split() if len(l) != 2: raise ValueError( "Gridtype file %s must not contain more or less than 2 columns per line." ) if l[0] in gfallowed: if gfdict[l[0]] != l[1]: deviations += 1 devstring += l[0] + " " gfdict[l[0]] = l[1] else: if gfdict[l[0]] != l[1]: raise ValueError( "Mandatory parameter %s is not equal for grid defined in config file and grid defined in grid file %s." % (l[0], gets("sp_gridsave"))) if deviations > gfmaxadjust: raise ValueError( "The grid used for the scan (see file %s) deviates from the one defined " % (gets("sp_gridsave")) + "in the config file by more than %d allowed parameter(s), namely %s. It should be:" % (gfmaxadjust, devstring), gfdict, ) elif deviations == 0: print( "...successfully read in grid used for the scan (was not auto-adjusted)..." ) else: print( "...successfully read in grid used for the scan (auto-adjusted in %d parameters: %s)..." % (deviations, devstring)) except (IOError, OSError) as e: raise IOError( "Auto-adjustment of spatial grid requested but file containing grid information %s could not be opened." % (gets("sp_gridsave")), e, ) # For each gridtype, use the deviating values. if gets("sp_gridtype") in ("full", "half"): for ds in devstring.split(): if ds == "COUNTS": np_counts = numpy.array(list( map(int, gfdict[ds].split(","))), dtype=int) elif ds == "DIST": np_del = numpy.array(list(map(float, gfdict[ds].split(","))), dtype=float) if gets("sp_gridtype") == "full": gets("suffix") elif gets("sp_gridtype") == "half": np_counts_pos = numpy.array([c for c in np_counts]) np_counts_neg = numpy.array([c for c in np_counts]) halfspace_vec = list(map(int, gets("halfspace").split(","))) for i in (0, 1, 2): if halfspace_vec[i] < 0: np_counts_pos[i] = abs(halfspace_vec[i]) if halfspace_vec[i] > 0: np_counts_neg[i] = abs(halfspace_vec[i]) gets("suffix") else: raise ValueError("Wrong value for config value sp_gridtype.") if gets("distance_cutoff") == "auto": if (gets("neighbour_check_type") == "eukledian" or gets("neighbour_check_type") == "manhattan_single"): raise ValueError( "Value 'auto' for option 'distance_cutoff' only supported for 'manhattan_multiple' and grids 'full' or 'half'." ) elif gets("neighbour_check_type") == "manhattan_multiple": if gets("sp_gridtype") in ("full", "half"): distance_cutoff = list(cutoff_scale * np_del) else: raise ValueError( "Value 'auto' for option 'distance_cutoff' only supported for grids 'full' or 'half'." ) else: raise Exception("Wrong value for option 'neighbour_check_type'.") else: if (gets("neighbour_check_type") == "eukledian" or gets("neighbour_check_type") == "manhattan_single"): distance_cutoff = getf("distance_cutoff") elif gets("neighbour_check_type") == "manhattan_multiple": try: distance_cutoff = list( map( lambda s: cutoff_scale * float(s), gets("distance_cutoff").split(","), )) except ValueError: raise TypeError( "Each element of option distance_cutoff must be of type float." ) if len(distance_cutoff) != 3: raise ValueError( "Option 'distance_cutoff' must have three entries for 'manhattan_multiple'." ) else: raise Exception("Wrong value for option 'neighbour_check_type'.") # get number of neighbours to search nr_shells = None if gets("nr_neighbours") == "auto": if gets("neighbour_check_type") == "manhattan_multiple" and gets( "sp_gridtype") in ("full", "half"): nr_neighbours = [ 2 * int(1.0 * distance_cutoff[i] / np_del[i]) + 1 for i in range(3) ] tmp = nr_neighbours[0] if all((i == tmp for i in nr_neighbours)): nr_shells = int((tmp - 1) // 2) nr_neighbours = nr_neighbours[0] * nr_neighbours[ 1] * nr_neighbours[2] - 1 else: raise ValueError( "Value 'auto' for 'nr_neighbours' only supported for 'manhattan_multiple' and the sp_gridtypes 'full' and 'half'." ) else: nr_neighbours = geti("nr_neighbours") if gets("max_nr_neighbours") == "auto": max_nr_neighbours = nr_neighbours else: max_nr_neighbours = geti("max_nr_neighbours") if gets("volumetric_data").startswith("from_scan,"): config_data = gets("volumetric_data").split(",") if len(config_data) == 2: # angular grid: check gridtype and set-up grid if gets("ang_gridtype") == "full": # these are the counts and distances for rotation countsposmain = numpy.array(list( map(int, gets("countspos").split(","))), dtype=int) countsnegmain = numpy.array(list( map(int, gets("countsneg").split(","))), dtype=int) totcounts = countsposmain + countsnegmain + 1 nr_dx_files = 1 for c in totcounts: nr_dx_files *= c else: raise ValueError( "Option 'volumetric_data' of 'from_scan' only supported for 'ang_gridtype'=='full'" ) filenames = [] reuse_ids = {} for f in range(1, nr_dx_files + 1): reuse_ids[f] = True # reuse_ids = {f:True for f in range(1,nr_dx_files+1)} if len(gets("scan_restartdirs")) > 0: print( "...checking which dx-files are present in old directories..." ) olddxfiles = get_old_dxfiles( gets("scan_restartdirs").split(","), gets("suffix")) for c in olddxfiles: reuse_ids[c] = False # reuse_ids.update({c:False for c in olddxfiles}) filenames += [olddxfiles[c] for c in olddxfiles] discard, directory = config_data if os.path.isdir(directory): print( "...checking which dx-files are present in directory created by a previous scan..." ) filenames += [ hashIO.hashpath(directory + os.sep + str(f) + "_" + gets("suffix")) for f in reuse_ids if reuse_ids[f] ] else: print( "WARNING: directory %s that should contain old dx-files does not exist." % (directory), file=sys.stderr, ) print("...determining which files are missing...") dx_files = sorted( [ f for f in filenames if os.path.exists(f) and os.stat(f).st_size > 0 ], key=str.lower, ) missing_dx_files = sorted( [ f.split(os.sep)[-1] for f in filenames if not os.path.exists(f) or os.stat(f).st_size <= 0 ], key=str.lower, ) if len(dx_files) != nr_dx_files: print( "WARNING: some files that were expected to be generated by the scan in directory '%s' are missing" % (directory), file=sys.stderr, ) if len(gets("scan_restartdirs")) > 0: print( " and could not be supplied from previous runs in the directories: %s" % (gets("scan_restartdirs")), file=sys.stderr, ) print( "Missing files: " + ", ".join( sorted( missing_dx_files, key=lambda e: # e.split(os.sep)[-1] is the name of the file (NAME) # NAME.split("_"+gets("suffix"))[0] is the numer of the dx-file int( e.split(os.sep)[-1].split("_" + gets("suffix")) [0]), )), file=sys.stderr, ) else: raise ValueError( 'Wrong format for parameter "volumetric_data" starting with "from_scan" given. Must be "from_scan,DIR".' ) elif gets("volumetric_data").startswith("dir_regex,"): config_data = gets("volumetric_data").split(",") if len(config_data) == 3: discard, directory, regex = config_data if os.path.isdir(directory): dx_files = [ f for f in hashIO.listfiles( directory, regex, nullsize=False, nulldepth=False) ] else: raise ValueError("Given directory " + directory + " is not a directory.") else: raise ValueError( 'Wrong format for parameter "volumetric_data" starting with "dir_regex" given. Must be "dir_regex,DIR,REGEX".' ) else: raise ValueError( 'Wrong value for parameter "volumetric_data" given. Must be "from_scan,DIR" or "dir_regex,DIR,REGEX".' ) use_regular = ((nr_shells is not None) and (gets("sp_gridtype") in ("full", "half")) and (gets("neighbour_check_type") == "manhattan_multiple") and (gets("nr_neighbours") == "auto")) if use_regular: if gets("max_nr_neighbours") != "auto": print( "WARNING: the value for 'max_nr_neighbours' is not used for regular grids.", file=sys.stderr, ) print( "...using fast neighbour-search algorithm for regular grid with %d neighbour shells..." % (nr_shells)) else: print("...using slow neighbour-search algorithm for irregular grid...") print("Conditions not fulfilled for fast algorithm:") regular_dict = { "sp_gridtype in ('full','half')": gets("sp_gridtype") in ("full", "half"), "neighbour_check_type == manhattan_multiple": gets("neighbour_check_type") == "manhattan_multiple", "nr_neighbours == auto": gets("nr_neighbours") == "auto", } tmpstring = "int(distance_cutoff[i]/distxyz[i]) not the same for i in {0,1,2}" if (gets("sp_gridtype") in ("full", "half") and gets("neighbour_check_type") == "manhattan_multiple" and gets("nr_neighbours") == "auto"): regular_dict[tmpstring] = nr_shells is not None else: regular_dict[tmpstring] = True for reason in regular_dict: if not regular_dict[reason]: print(" %s" % (reason)) geti("pool_chunksize") if not do_calculate: if len(dx_files) == 0: print( "WARNING: some of the dx-files you requested are currently not present, which might.", file=sys.stderr, ) print( " not be a problem since this is only a config check.", file=sys.stderr, ) return if len(dx_files) == 0: raise RuntimeError( "Could not find any non-empty dx-files matching the given criteria." ) # sorting the dx-files by name to always get a sorted minima file dx_files = sorted( dx_files, key=lambda e: # e.split(os.sep)[-1] is the name of the file (NAME) # NAME.split("_"+gets("suffix"))[0] is the numer of the dx-file int(e.split(os.sep)[-1].split("_" + gets("suffix"))[0]), ) print("...sorted list of dx-files...") if gets("sp_gridtype") == "full": np_grid = general_grid(np_org, np_counts, np_counts, np_del) elif gets("sp_gridtype") == "half": np_grid = general_grid(np_org, np_counts_pos, np_counts_neg, np_del) else: raise ValueError("Wrong value for config value sp_gridtype.") pos_from_index = lambda index: np_grid[index] if use_regular: if gets("sp_gridtype") == "full": FD_nr_points = list(map(lambda c: 2 * c + 1, np_counts)) elif gets("sp_gridtype") == "half": FD_nr_points = list(np_counts_pos + np_counts_neg + 1) else: raise ValueError("Wrong value for config value sp_gridtype.") c_neighbour_list = RegularNeighbourListPy(FD_nr_points, int(nr_shells), prog_report=False, exclude_border=True) else: c_neighbour_list = IrregularNeighbourListPy( np_grid, nr_neighbours, distance_cutoff, max_nr_neighbours=max_nr_neighbours, prog_report=(progress == 1), cutoff_type=gets("neighbour_check_type"), sort_it=False, ) print("...generated neighbour list...") try: nr_threads = int(os.environ["OMP_NUM_THREADS"]) except KeyError: nr_threads = 1 except ValueError: nr_threads = 1 # how to properly handle keyboard interrupts when multi processing has been taken from: # http://stackoverflow.com/questions/14579474/multiprocessing-pool-spawning-new-childern-after-terminate-on-linux-python2-7 terminating = Event() # global data_ms #DEBUG # data_ms = (c_neighbour_list, terminating) #DEBUG args = [[ single_file, getf("degeneration"), nr_neighbours, progress, getf("maxval"), None, geti("depths_sort"), getb("gzipped"), ] for single_file in dx_files] if not gets("minima_file_save").endswith(".gz"): minima_file = io.open(gets("minima_file_save"), "wb") else: try: from subprocess import Popen, PIPE gzipprocess = Popen( ["gzip", "-6", "-c", "-"], stdin=PIPE, stdout=io.open(gets("minima_file_save"), "wb"), bufsize=4096, ) minima_file = gzipprocess.stdin except ImportError: print( "WARNING: cannot import gzip module, will treat %s as a non-gzipped one." % (gets("minima_file_save")[0:-3]), file=sys.stderr, ) minima_file = io.open(gets("minima_file_save")[0:-3], "wb") except OSError: print( "WARNING: cannot import gzip module, will treat %s as a non-gzipped one." % (filename), file=sys.stderr, ) minima_file = io.open(gets("minima_file_save")[0:-3], "wb") minima_file.write( hashstring("#%s FF: %s\n" % (gets("minima_file_save"), gets("forcefield")))) dx_file_count = 0 dx_file_max = len(args) try: chunksize = geti("pool_chunksize") while dx_file_count < dx_file_max: pool = Pool( nr_threads, initializer=_minimasearch_parallel_init, initargs=(c_neighbour_list, terminating, PROCNAME), ) # NODEBUG chunkstart = dx_file_count if chunkstart + chunksize > dx_file_max: chunkend = dx_file_max else: chunkend = chunkstart + chunksize # loop over all dx-files via worker processes for temp in pool.imap(_minimasearch_process, args[chunkstart:chunkend]): # NODEBUG # for arg in args: #DEBUG dx_file_count += 1 # temp = _minimasearch_process(arg) #DEBUG if temp is None: if progress > 0: print("Skipping dx-file %d of %d: read error" % (dx_file_count, dx_file_max)) continue minima, depths, min_energies, (a1, a2, a3) = temp tmplen = list(map(len, (minima, depths, min_energies))) if min(tmplen) <= 0: if progress > 0: print("Skipping dx-file %d of %d: no minima found" % (dx_file_count, dx_file_max)) continue if not (min(tmplen) == max(tmplen)): if progress > 0: print( "Error while processing dx-file %d of %d" % (dx_file_count, dx_file_max), file=sys.stderr, ) raise RuntimeError( "Error while processing dx-file %d of %d: lists do not have equal lengths" % (dx_file_count, dx_file_max)) if progress > 0: print("Processing dx-file %d of %d: %d minima" % (dx_file_count, dx_file_max, len(minima))) for minimum, depth, min_energy in zip(minima, depths, min_energies): min_pos = pos_from_index(minimum) minima_file.write( hashstring( "%10d %15.8f %15.8f %15.8f %15.8f %15.8f %15.8f %15.8E %E \n" % ( minimum, min_pos[0], min_pos[1], min_pos[2], a1, a2, a3, min_energy, depth, ))) del ( minimum, depth, min_energy, minima, depths, min_energies, a1, a2, a3, temp, min_pos, ) pool.close() # NODEBUG pool.join() # NODEBUG except KeyboardInterrupt as e: print("Caught keyboard interrupt.", file=sys.stderr) pool.terminate() # NODEBUG pool.join() # NODEBUG print("Terminating main routine prematurely.", file=sys.stderr) minima_file.close() raise e minima_file.close() if gets("minima_file_save").endswith(".gz"): gzipprocess.wait()
def similarityscreening_main(parser): """Main control function for the similarity screening procedure. Args: parser: (of class ManipulateAggregates.collection.read.SectionlessConfigParser) contains information about the config file. Defines the methods "get_str", "get_int", "get_float" and "get_boolean" to get the appropriate data type. """ set_procname(PROCNAME) gets = parser.get_str geti = parser.get_int getf = parser.get_float getb = parser.get_boolean do_calculate = not (getb("config_check")) # value for progress reports if geti("progress") not in [0, 1, 2]: raise ValueError( 'Wrong value for parameter "progress" given. Must be 0,1 or 2.') progress = geti("progress") # check whether partitioning over nodes was switched on if not gets("partition") == "1/1": raise ValueError( "Parallelization unequal 1/1 not suported for similarity screening." ) mol1 = read_from_file(gets("geometry1"), ff=None) mol2 = read_from_file(gets("geometry2"), ff=None) nr_ats1 = mol1.obmol.NumAtoms() nr_ats2 = mol2.obmol.NumAtoms() obmol = prepare_molecules(mol1, mol2, align=getb("prealign")) std_map = maagbel.StdMapStringString() # add the appropriate configuration paramters to the std::map<std::string,std::string> std_map["ffname"] = gets("forcefield") # try to find the chosen force field if gets("forcefield").lower() not in ["uff", "mmff94", "gaff", "ghemical"]: raise ValueError( 'Wrong force field given. Only "uff", "mmff94", "gaff" and "ghemical" will be accepted.' ) temp_ff = maagbel.OBForceField.FindType(gets("forcefield").lower()) if temp_ff is None: raise RuntimeError( "Somehow there was an error loading the forcefield %s (although it should be known to OpenBabel)." % (gets("forcefield").lower())) try: if getf("energy_cutoff") >= 0: if getb("use_ff_units"): print( "...using given energy in force field units: %.6f %s (equals %.6f meV)" % ( getf("energy_cutoff"), temp_ff.GetUnit(), getf("energy_cutoff") / E_UNIT_CONVERSION[temp_ff.GetUnit()], )) std_map["ecutoff"] = str(getf("energy_cutoff")) else: std_map["ecutoff"] = str( getf("energy_cutoff") * E_UNIT_CONVERSION[temp_ff.GetUnit()]) print( "...converting given energy cutoff to force field units: %s meV -> %.6f %s" % ( gets("energy_cutoff"), getf("energy_cutoff") * E_UNIT_CONVERSION[temp_ff.GetUnit()], temp_ff.GetUnit(), )) else: std_map["ecutoff"] = str(-100) except KeyError as e: raise RuntimeError( "Unknown unit type '%s' of the chosen force field '%s', cannot convert the energy cutoff in meV to that unit. KeyError was: %s. Known units are: %s" % ( temp_ff.GetUnit(), gets("forcefield").lower(), e, ", ".join([t for t in E_UNIT_CONVERSION]), )) finally: del temp_ff postalign = getb("postalign") geti("symprec") geti("maxscreensteps") if not (gets("consider_h1") == "" and (gets("consider_h2") in ("", "SAME"))): # treat 'consider_h1' if len(gets("consider_h1")) != 0: try: tmp_h1 = list(map(int, gets("consider_h1").split(","))) except ValueError as e: raise ValueError( "Could not parse consider_h1, must be comma-separated ints." ) if len(tmp_h1) != 0: if min(tmp_h1) < 1 or max(tmp_h1) > nr_ats1: raise ValueError( "Indices for consider_h1 must be >=%d and <=%d" % (1, nr_ats1)) else: tmp_h1 = [] # treat 'consider_h2' if gets("consider_h2") == "SAME": if gets("geometry1") != gets("geometry2") and len(tmp_h1) != 0: raise ValueError( "Can only use 'SAME' for consider_h2 when geometry1 and geometry2 are identical." ) else: tmp_h2 = [i + nr_ats1 for i in tmp_h1] elif len(gets("consider_h2")) != 0: try: tmp_h2 = list( map(lambda s: int(s) + nr_ats1, gets("consider_h2").split(","))) except ValueError as e: raise ValueError( "Could not parse 'consider_h2', must be comma-separated ints." ) if (len(tmp_h2) != 0 and min(tmp_h2) < nr_ats1 + 1 or max(tmp_h2) > nr_ats1 + nr_ats2): raise ValueError( "Indices for consider_h1 must be >=%d and <=%d" % (1, nr_ats2)) else: tmp_h2 = [] important_hs = ",".join(map(str, tmp_h1 + tmp_h2)) std_map["imp-H"] = important_hs pgstep = -1 if getb("pointgroups"): if gets("pgstep") == "last": pgstep = gets("pgstep") elif gets("pgstep") == "first": pgstep = 1 else: pgstep = geti("pgstep") if pgstep < 0: raise ValueError("The given pgstep must be >=0.") getb("subgroups") getb("exclude_c1") if getb("pgwrite"): pgfilename = gets("pgfile") else: pgfilename = None if len(gets("pgregex")) != 0: pgregex = re.compile(gets("pgregex")) else: pgregex = None if pgstep != -1 and (len(gets("pgregex")) == 0 and not getb("pgwrite")): pgstep = -1 print( "WARNING: pgwrite is False and no pgregex given -> pointgroups will not be determined in step " + gets("pgstep"), file=sys.stderr, ) if not do_calculate: return # to avoid segfaults, define some bogus input parameters that would normally be # given via the command-line in_out_options = maagbel.OBConversion() in_out_options.SetInFormat("nul") in_out_options.SetOutFormat("nul") # create a new OBAggregate instance that can contain a single aggregate and # that will walk through all the minima that were found. Each of these geometries # will be added to obmol as a new conformer so that the OBOp SimSearch can # perform its screening duty saveobmol = maagbel.OBAggregate(obmol) # copy constructor obmol.DeleteConformers(0, obmol.NumConformers() - 1) # clean all conformer information if obmol.NumConformers() != 0: raise RuntimeError( "Could not clear conformer information, still %d left." % (obmol.NumConformers())) tempmol = maagbel.OBAggregate(saveobmol) sameff = True gzipped = gets("minima_file_load").endswith(".gz") minima_file = gets("minima_file_load") if gzipped: if not (os.path.exists(gets("minima_file_load"))) and os.path.exists( gets("minima_file_load")[0:-3]): print( "WARNING: could not find gzipped minima file but non-gzipped one. Will use the latter.", file=sys.stderr, ) catproc = Popen(["cat", gets("minima_file_load")[0:-3]], stdout=PIPE) minima_file = gets("minima_file_load")[0 - 3] gzipped = False else: catproc = Popen(["zcat", gets("minima_file_load")], stdout=PIPE) gzipped = True else: catproc = Popen(["cat", gets("minima_file_load")], stdout=PIPE) minima_file = gets("minima_file_load") gzipped = False awkproc = Popen(["awk", '{print $5 " " $6 " " $7}'], stdin=catproc.stdout, stdout=PIPE) uniqproc = Popen(["uniq"], stdin=awkproc.stdout, stdout=PIPE) f = uniqproc.stdout # angles should be in a monotonically nondecreasing order angles = [ tuple(map(float, line.rstrip().split()[0:3])) for line in (tobasestring(bl) for bl in f) if not line.startswith("#") and len(line.split()) == 3 ] if not angles == list(sorted(angles)): print( "WARNING: minima file was not in sorted order with respect to the angles.", file=sys.stderr, ) print( " Beware that results might change slightly if the order is changed.", file=sys.stderr, ) del angles f.close() catproc.wait() awkproc.wait() uniqproc.wait() old_angles = (-float("inf"), -float("inf"), -float("inf")) ang = [0.0, 0.0, 0.0] # current angles disp = [0.0, 0.0, 0.0] # current displacement if progress > 0: print("...adding minima geometries to data structure...") printcount = 0 if gzipped: f = gziplines(minima_file) else: f = open(gets("minima_file_load"), "r") transfunc = tempmol.TranslatePart rotfunc = tempmol.RotatePart coordfunc = tempmol.GetCoordinates for line in f: if not line.startswith("#"): linevals = line.rstrip().split() disp = list(map(float, linevals[1:4])) pos_disp = double_array(disp) neg_disp = double_array([-v for v in disp]) ang = tuple(map(float, linevals[4:7])) if ang != old_angles: if progress > 0 and printcount % 10 == 0: print( ERASE_LINE + "...re-creating aggregate with new angles: (%8.2f,%8.2f,%8.2f)..." % ang + CURSOR_UP_ONE) printcount = 0 printcount += 1 tempmol.Assign(saveobmol) # since python needs quite some time to access an objects member, saving # a member saves time a1, a2, a3 = ang old_angles = ang rotfunc(0, 1, a1) rotfunc(0, 2, a2) rotfunc(0, 3, a3) transfunc(0, pos_disp) # actually deep-copy the new coordinates to avoid segfaults obmol.AddConformer(coordfunc(), True) transfunc(0, neg_disp) else: l = line.split() if len(l) >= 3 and l[1] == "FF:": print( "\n...determining force field used to create the minima file %s..." % (gets("minima_file_load"))) if l[2].lower() != gets("forcefield").lower(): print( "...old force field '%s' is not the same as the current one '%s'..." % (l[2].lower(), gets("forcefield").lower())) sameff = False else: print( "...minima file was created using the current force field..." ) sameff = True f.close() if progress > 0: print() print("...%d aggregates have been processed..." % (obmol.NumConformers())) if obmol.NumConformers() <= 0: print( "\n...not a single conformer was processed, hence we're done...\n") return # force maagbel to be verbose if detailed progress reports were requested if progress == 1: std_map["verbose"] = "true" simscreen = maagbel.OBOp.FindType("simscreen") prescreen = False screenstring = "" if geti("symprec") >= 0: if prescreen: screenstring += "and " prescreen = True screenstring += "symmetry " std_map["prec"] = str(geti("symprec")) # align all aggregates with their centers to (0,0,0) and their third and second # main axes to the x axis and the y axis, respectively, to improve symmetry # screening success std_map["ssalign"] = "b" if getf("energy_cutoff") > 0: if prescreen: screenstring += "and " screenstring += "energy " prescreen = True else: std_map.erase("ecutoff") step = 0 if pgstep == step: nr_aggs = obmol.NumConformers() obmol = _get_pg( obmol, saveobmol, getb("subgroups"), not (getb("exclude_c1")), pgfilename, progress, postalign, pgregex, ) if obmol.NumConformers() > nr_aggs: raise RuntimeError( "Number of conformers increased (%d -> %d) during pointgroup screening." % (nr_aggs, obmol.NumConformers())) if prescreen: nr_aggs = obmol.NumConformers() step += 1 # First, only sort out those aggregates that do not pass the energy and symmetry # filter. if progress > 0: print("\n...starting " + screenstring + "pre-screening...\n") # perform the pre-screening success = simscreen.Do(obmol, "", std_map, in_out_options) if obmol.NumConformers() > nr_aggs: raise RuntimeError( "Number of conformers increased (%d -> %d) during symmetry screening." % (nr_aggs, obmol.NumConformers())) if not success: raise RuntimeError( "Error executing the SimScreen OBOp in OpenBabel.") if progress > 0: print("...%d aggregates passed %sfilter...\n\n" % (obmol.NumConformers(), screenstring)) # energy and symmetry screening have already been performed if they were desired # so do not do that again std_map.erase("ecutoff") std_map.erase("ssalign") std_map.erase("prec") else: print("\n...skipping energy and symmetry pre-screening...\n") if prescreen and pgstep == step: nr_aggs = obmol.NumConformers() obmol = _get_pg( obmol, saveobmol, getb("subgroups"), not (getb("exclude_c1")), pgfilename, progress, postalign, pgregex, ) if obmol.NumConformers() > nr_aggs: raise RuntimeError( "Number of conformers increased (%d -> %d) during pointgroup screening." % (nr_aggs, obmol.NumConformers())) success = True maxstep = geti("maxscreensteps") # screen until fewer than nr_geometries agregates are left rmsd = getf("rmsd_min") rmsdstep = getf("rmsd_step") maxagg = geti("nr_geometries") while success and obmol.NumConformers() > maxagg and step < maxstep: step += 1 nr_aggs = obmol.NumConformers() std_map["rcutoff"] = str(rmsd) success = simscreen.Do(obmol, "", std_map, in_out_options) if obmol.NumConformers() > nr_aggs and success: raise RuntimeError( "Number of conformers increased (%d -> %d) during screening step %d." % (nr_aggs, obmol.NumConformers(), step)) if progress > 0: print( "...%d aggregates passed screening step %d at rmsd %f...\n\n" % (obmol.NumConformers(), step, rmsd)) if pgstep == "last": nr_aggs = obmol.NumConformers() obmol = _get_pg( obmol, saveobmol, getb("subgroups"), not (getb("exclude_c1")), pgfilename, progress, postalign, pgregex, ) if obmol.NumConformers() > nr_aggs: raise RuntimeError( "Number of conformers increased (%d -> %d) during pointgroup screening." % (nr_aggs, obmol.NumConformers())) rmsd += rmsdstep if pgstep == step: nr_aggs = obmol.NumConformers() obmol = _get_pg( obmol, saveobmol, getb("subgroups"), not (getb("exclude_c1")), pgfilename, progress, postalign, pgregex, ) if obmol.NumConformers() > nr_aggs: raise RuntimeError( "Number of conformers increased (%d -> %d) during pointgroup screening." % (nr_aggs, obmol.NumConformers())) if not success: raise RuntimeError("Error executing the SimScreen OBOp in OpenBabel.") if step >= maxstep: print( "WARNING: maximum number of similarity screening steps exceeded", file=sys.stderr, ) if success: if progress > 0: print("...%d aggregates passed screening..." % (obmol.NumConformers())) # write all conformers that passed the filter to file if progress > 0: print("...writing %d aggregates to file %s..." % (obmol.NumConformers(), gets("screened_xyz"))) writefile = pybel.Outputfile("xyz", gets("screened_xyz"), overwrite=True) pybelmol = pybel.Molecule(obmol) nr_conformers = obmol.NumConformers() commentfunc = obmol.SetTitle setconffunc = obmol.SetConformer if postalign: alignfunc = obmol.Align aligncenter = double_array([0.0, 0.0, 0.0]) alignaxis1 = double_array([1.0, 0.0, 0.0]) alignaxis2 = double_array([0.0, 1.0, 0.0]) for conf in range(nr_conformers): commentfunc("Conformer %d/%d" % (conf + 1, nr_conformers)) setconffunc(conf) if postalign: alignfunc(aligncenter, alignaxis1, alignaxis2) writefile.write(pybelmol) writefile.close() if pgstep == "last": nr_aggs = obmol.NumConformers() obmol = _get_pg( obmol, saveobmol, getb("subgroups"), not (getb("exclude_c1")), pgfilename, progress, postalign, pgregex, ) if obmol.NumConformers() > nr_aggs: raise RuntimeError( "Number of conformers increased (%d -> %d) during pointgroup screening." % (nr_aggs, obmol.NumConformers()))