Esempio n. 1
0
def _get_pg_thread(args):
    global data_ss

    threadobmol, terminating, PROCNAME = data_ss

    set_procname(PROCNAME + ".%d" % (os.getpid()))

    try:
        if not terminating.is_set():
            i, tolerance = args
            sym = maagbel.OBPointGroup()
            sym.Setup(threadobmol, i)
            pg = sym.IdentifyPointGroup(tolerance)
            del sym
    except KeyboardInterrupt:
        print(
            "Terminating worker process " + str(os.getpid()) + " prematurely.",
            file=sys.stderr,
        )
    return i, pg
Esempio n. 2
0
def scan_main(parser):
    """Main control function for the scanning procedure.

    Args:
        parser: (of class ManipulateAggregates.collection.read.SectionlessConfigParser)
            contains information about the config file. Defines the methods
            "get_str", "get_int", "get_float" and "get_boolean" to get the
            appropriate data type.
    """
    global grid
    set_procname(PROCNAME)
    gets = parser.get_str
    geti = parser.get_int
    getf = parser.get_float
    getb = parser.get_boolean
    do_calculate = not (getb("config_check"))
    # do some error checking
    # forcefield
    if gets("forcefield").lower() not in ["uff", "mmff94", "gaff", "ghemical"]:
        raise ValueError(
            'Wrong foce field given. Only "uff", "mmff94", "gaff" and "ghemical" will be accepted.'
        )
    temp_ff = OBForceField.FindType(gets("forcefield").lower())
    if temp_ff is None:
        raise ValueError(
            "Somehow there was an error loading the forcefield %s (although it should be known to OpenBabel)."
            % (gets("forcefield").lower()))
    del temp_ff
    # boolean values
    for check_option in [
            "save_dx",
            "save_aligned",
            "save_noopt",
            "save_opt",
            "correct",
            "sp_opt",
            "sp_correct",
            "sp_remove",
            "globalopt",
            "prealign",
            "gzipped",
    ]:
        getb(check_option)
    # remaining float values
    for check_option in [
            "cutoff", "vdw_scale", "maxval", "cutoff", "vdw_scale"
    ]:
        getf(check_option)
    # remaining integer values
    for check_option in [
            "columns", "optsteps", "progress", "hashwidth", "hashdepth"
    ]:
        geti(check_option)
    # check whether some options conflict
    if (gets("volumetric_data").startswith("from_scan,")
            and "minimasearch" in gets("jobtype").split(",")
            and not getb("save_dx")):
        print(
            "WARNING: a subsequent minimasearch tries to get its dx-files from this scan but",
            file=sys.stderr,
        )
        print(
            "         you requested not to save dx-files. This is probably an error (but not so if",
            file=sys.stderr,
        )
        print(
            "         you requested those dx-files to be used from a different directory) so please check.",
            file=sys.stderr,
        )

    # initialize directory name hashing
    init_hashing(geti("hashdepth"), geti("hashwidth"), gets("hashalg"))

    # value for progress reports
    if geti("progress") not in [0, 1, 2]:
        raise ValueError(
            'Wrong value for parameter "progress" given. Must be 0,1 or 2.')

    # populate all variables with the given values
    # read in the two molecules/aggregates from the given files
    mol1 = read_from_file(gets("geometry1"), ff=None)
    mol2 = read_from_file(gets("geometry2"), ff=None)

    # Compute radii of spheres that completely encompass both molecules to be able to
    # auto-adjust the gridsize
    mol1_vdw = mol1.get_vdw_radii()
    mol2_vdw = mol2.get_vdw_radii()
    mol1_coords = mol1.get_coordinates()
    mol2_coords = mol2.get_coordinates()
    mol1_center = numpy.mean(numpy.array(mol1_coords, dtype=float), axis=0)
    mol2_center = numpy.mean(numpy.array(mol2_coords, dtype=float), axis=0)
    # Will contain the radius of a sphere centered at the molecular center that
    # completely encompasses mol1
    maxdist1 = 0.0
    # Will contain the radius of a sphere centered at the molecular center that
    # completely encompasses mol2
    maxdist2 = 0.0
    distcutoff = getf("cutoff")
    vdwscale = getf("vdw_scale")
    for c1, vdw1 in zip(mol1_coords, mol1_vdw):
        npc1 = numpy.array(c1, dtype=float)
        if numpy.linalg.norm(npc1 - mol1_center) + (vdw1 *
                                                    vdwscale) > maxdist1:
            maxdist1 = numpy.linalg.norm(npc1 - mol1_center) + (vdw1 *
                                                                vdwscale)
    for c2, vdw2 in zip(mol2_coords, mol2_vdw):
        npc2 = numpy.array(c2, dtype=float)
        if numpy.linalg.norm(npc2 - mol2_center) + (vdw2 *
                                                    vdwscale) > maxdist2:
            maxdist2 = numpy.linalg.norm(npc2 - mol2_center) + (vdw2 *
                                                                vdwscale)
    # The radius of the sphere outside which no points need to be considered
    # with reduced precision and rounded up
    bigspherestr = "%.3f" % (maxdist1 + maxdist2 + distcutoff + 0.0005)
    bigsphererad = float(bigspherestr)

    # treat grid auto adjustments
    if gets("sp_gridtype") in ("full", "half"):
        # these are only the counts in one direction
        np_counts = numpy.array(list(map(int,
                                         gets("countsxyz").split(","))),
                                dtype=int)
        np_del = numpy.array(list(map(float,
                                      gets("distxyz").split(","))),
                             dtype=float)
        np_org = numpy.array([0, 0, 0], dtype=float)
        newdistxyz = numpy.array(
            list(
                map(
                    lambda f: float("%.3f" % (f)),
                    (bigsphererad + 0.0005) / (np_counts - 1),
                )),
            dtype=float,
        )
        newcountsxyz = (numpy.array(
            list(map(int, (bigsphererad + 0.0005) / np_del)), dtype=int) + 1)
        distsdiffer = numpy.linalg.norm(newdistxyz - np_del) > 0.0
        countsdiffer = numpy.linalg.norm(newcountsxyz - np_counts) > 0.0
        tmpstring = "..."
        if gets("sp_autoadjust") == "distxyz":
            if distsdiffer:
                tmpstring += "changing grid size in some directions by adjusting grid spacing 'distxyz' by: "
                tmpstring += "x: %+.3f, y: %+.3f, z: %+.3f" % (
                    tuple(newdistxyz - np_del))
                np_del = newdistxyz
            else:
                tmpstring += "grid dimensions need no adjustment"
        elif gets("sp_autoadjust") == "countsxyz":
            if countsdiffer:
                tmpstring += "changing grid size in some directions by adjusting number of points 'countsxyz' by: "
                tmpstring += "x: %+d, y: %+d, z: %+d" % (tuple(newcountsxyz -
                                                               np_counts))
                np_counts = newcountsxyz
            else:
                tmpstring += "grid dimensions need no adjustment"
        elif gets("sp_autoadjust") in ("", "none"):
            if countsdiffer or distsdiffer:
                tmpstring += "won't adjust, but grid dimensions inappropriate "
                tmpstring += "in some directions by (distxyz/countsxyz): "
                tmpstring += "x: %+.3f/%+d, y: %+.3f/%+d, z: %+.3f/%+d" % (
                    tuple(a for b in zip(newdistxyz - np_del, newcountsxyz -
                                         np_counts) for a in b))
            else:
                tmpstring += "grid dimensions need no adjustment"
        else:
            raise ValueError("Wrong value for config value sp_autoadjust.")
        tmpstring += "..."
        print(tmpstring)
        with open(gets("sp_gridsave"), "w") as gf:
            writeto(gf, "TYPE %s" % (gets("sp_gridtype")))
            if gets("sp_gridtype") == "half":
                writeto(gf, "%s" % (gets("halfspace")))
            writeto(gf, "\n")
            writeto(gf, "COUNTS %d,%d,%d\n" % tuple(np_counts))
            writeto(gf, "DIST %.3f,%.3f,%.3f\n" % tuple(np_del))
            writeto(gf, "ORG %.3f,%.3f,%.3f\n" % tuple(np_org))
    else:
        if not gets("sp_autoadjust") in ("", "none"):
            print(
                "WARNING: grid auto-adjustment not supported for current gridtype %s"
                % (gets("sp_gridtype")),
                file=sys.stderr,
            )

    # spatial grid: check gridtype and set-up grid
    if gets("sp_gridtype") == "full":
        if do_calculate:
            np_grid = general_grid(np_org, np_counts, np_counts, np_del)
            dx_dict = {
                "filename": gets("suffix"),
                "counts": list(2 * np_counts + 1),
                "org": list(np_grid[0]),
                "delx": [np_del[0], 0.0, 0.0],
                "dely": [0.0, np_del[1], 0.0],
                "delz": [0.0, 0.0, np_del[2]],
            }
            dx_dict["save_dx"] = getb("save_dx")
            dx_dict["gzipped"] = getb("gzipped")
        else:
            gets("suffix")
            getb("save_dx")
    elif gets("sp_gridtype") == "half":
        np_counts_pos = numpy.array([c for c in np_counts])
        np_counts_neg = numpy.array([c for c in np_counts])
        halfspace_vec = list(map(int, gets("halfspace").split(",")))
        for i in (0, 1, 2):
            if halfspace_vec[i] < 0:
                np_counts_pos[i] = abs(halfspace_vec[i])
            if halfspace_vec[i] > 0:
                np_counts_neg[i] = abs(halfspace_vec[i])
        if do_calculate:
            np_grid = general_grid(np_org, np_counts_pos, np_counts_neg,
                                   np_del)
            dx_dict = {
                "filename": gets("suffix"),
                "counts": list(np_counts_pos + np_counts_neg + 1),
                "org": list(np_grid[0]),
                "delx": [np_del[0], 0.0, 0.0],
                "dely": [0.0, np_del[1], 0.0],
                "delz": [0.0, 0.0, np_del[2]],
            }
            dx_dict["save_dx"] = getb("save_dx")
            dx_dict["gzipped"] = getb("gzipped")
        else:
            gets("suffix")
            getb("save_dx")
    else:
        raise ValueError("Wrong value for config value sp_gridtype.")
    # check whether this gives an error
    restarted = len(gets("scan_restartdirs")) > 0
    if restarted:
        olddirs = gets("scan_restartdirs").split(",")
        for d in olddirs:
            if not os.path.isdir(d):
                if do_calculate:
                    print(
                        "WARNING: directory supposed to contain dx files from previous runs %s does not exist. Skipping."
                        % (d),
                        file=sys.stderr,
                    )
                else:
                    raise ValueError(
                        "Directory supposed to contain dx files from previous runs %s does not exist."
                        % (d))
    # angular grid: check gridtype and set-up grid
    if gets("ang_gridtype") == "full":
        # these are the counts and distances for rotation
        countsposmain = numpy.array(list(map(int,
                                             gets("countspos").split(","))),
                                    dtype=int)
        countsnegmain = numpy.array(list(map(int,
                                             gets("countsneg").split(","))),
                                    dtype=int)
        distmain = numpy.array(list(map(float,
                                        gets("dist").split(","))),
                               dtype=float)
        if do_calculate:
            np_rot = general_grid(numpy.array([0.0, 0.0, 0.0]), countsposmain,
                                  countsnegmain, distmain)
    else:
        raise ValueError("Wrong value for config value ang_gridtype.")

    partition = tuple(map(int, gets("partition").split("/")))
    if len(partition) != 2:
        raise ValueError(
            "Format for 'partition' must be I1/I2 with I1 and I2 positive integers and I1<=I2"
        )
    if partition[0] > partition[1] or partition[0] < 1 or partition[1] < 1:
        raise ValueError(
            "Format for 'partition' must be I1/I2 with I1 and I2 positive integers and I1<=I2"
        )

    # Create a mask for points whose energy never has to be evaluated
    # A "False" associated with a point means "do not evaluate its energy".
    try:
        numpy.sqrt(
            numpy.einsum(
                "ij,ij->i",
                numpy.array([[1.0, 1.0, 0.0]]),
                numpy.array([[1.0, 1.0, 0.0]]),
            ))
        numpy.linalg.norm(numpy.array([[1.0, 1.0, 0.0]]), axis=1)
    except AttributeError:
        normaxisone = lambda array: numpy.apply_along_axis(
            numpy.linalg.norm, 1, array)
    except TypeError:
        normaxisone = lambda array: numpy.sqrt(
            numpy.einsum("ij,ij->i", array, array))
    else:
        normaxisone = lambda array: numpy.linalg.norm(array, axis=1)
    mask = numpy.ones((len(np_grid), ), dtype=bool)
    dist = numpy.zeros((len(np_grid), ), dtype=float)
    origin = numpy.array([0.0, 0.0, 0.0], dtype=float)
    min1_vdw = min(mol1_vdw)
    min2_vdw = min(mol2_vdw)
    for c1, vdw1 in zip(mol1_coords, mol1_vdw):
        npc1 = numpy.array(c1, dtype=float)
        vdw = (vdw1 + min2_vdw) * vdwscale
        dist = normaxisone(np_grid - (npc1 - mol1_center))
        # dist = numpy.linalg.norm((np_grid-(npc1-mol1_center)),axis=1)
        mask[dist < vdw] = False
    inmasked = numpy.sum(mask == False)
    dist = normaxisone(np_grid - origin)
    # dist      = numpy.linalg.norm((np_grid-origin),axis=1)
    mask[dist > bigsphererad] = False
    outmasked = numpy.sum(mask == False) - inmasked
    print(
        "...computed mask, reduction in points: %.2f%%, inside: %.2f%%, outside: %.2f%%..."
        % (
            100.0 * (inmasked + outmasked) / len(mask),
            100.0 * inmasked / len(mask),
            100.0 * outmasked / len(mask),
        ))

    if not do_calculate:
        return

    # align the two molecules and append one to the other
    # after this, mol1 and mol2 can no longer be used
    obmol = prepare_molecules(
        mol1,
        mol2,
        gets("aligned_suffix"),
        save_aligned=getb("save_aligned"),
        align=getb("prealign"),
    )

    # convert the grid to C data types
    grid = double_dist(np_grid)

    if restarted:
        print("This is a restarted run (old files are in: %s)" %
              (gets("scan_restartdirs")))
        olddxfiles = get_old_dxfiles(
            gets("scan_restartdirs").split(","), gets("suffix"))
        print("Number of already existing dx files: %d" % (len(olddxfiles)))
    else:
        olddxfiles = {}

    # For every angle, scan the entire spatial grid and save
    # each optimum geometry if desired
    # Will also return a structure making it easy to find the optimum
    # for every spatial point
    transrot_result = _transrot_en(
        obmol,
        gets("forcefield").lower(),
        grid,
        np_rot,
        getf("maxval"),
        dx_dict,
        getb("correct"),
        getf("cutoff"),
        getf("vdw_scale"),
        report=geti("progress"),
        reportmax=len(np_rot),
        save_noopt=getb("save_noopt"),
        save_opt=getb("save_opt"),
        optsteps=geti("optsteps"),
        olddxfiles=olddxfiles,
        partition=partition,
        mask=mask,
    )

    del grid  # the grid in C data types is no longer needed since the scan has already been performed

    # Evaluate transrot_result to find the angular optimum for every
    # spatial grid point, if so desired
    if getb("sp_opt"):

        dx_dict["filename"] = gets("sp_opt_dx")
        dx_dict["save_dx"] = getb("sp_opt")

        _sp_opt(
            gets("sp_opt_dx"),
            gets("sp_opt_xyz"),
            gets("sp_opt_ang"),  # filenames
            dx_dict,  # data about the dx-file (header and how to save it)
            getb("sp_correct"),
            getb("sp_remove"),
            getf("maxval"),  # data concerning postprocessing of energy data
            getb("globalopt"),  # is the global optimum desired?
            obmol,
            np_grid,  # data needed to print out xyz-files at the optimum geometries
            transrot_result,  # see above
        )
Esempio n. 3
0
def _transrot_en_process(args):
    """Each worker process executes this function.

    Args:
        args: (list) arguments to be passed to the worker processes (via
            pickling)
    """
    global data_s

    defaultobmol, transgrid, terminating, PROCNAME, mask = data_s

    if mask is not None:
        notmasked = lambda i: mask[i]
    else:
        notmasked = lambda i: True

    set_procname(PROCNAME + ".%d" % (os.getpid()))

    try:
        if not terminating.is_set():

            (
                a1,
                a2,
                a3,
                ffname,
                report,
                maxval,
                dx_dict,
                correct,
                savetemplate,
                templateprefix,
                anglecount,
                count,
                save_noopt,
                save_opt,
                optsteps,
                cutoff,
                vdw_scale,
                oldfile,
            ) = args

            angle_string = str(a1) + "," + str(a2) + "," + str(a3)
            angle_comment = "angles=(" + angle_string + ")"

            if oldfile is not None:
                compute = False
                try:
                    old = read_dx(
                        oldfile,
                        grid=False,
                        data=True,
                        silent=True,
                        comments=True,
                        gzipped=dx_dict["gzipped"],
                    )
                except ValueError as e:
                    print(
                        "Error when reading in old dx-file %s, recomputing. Error was:"
                        % (oldfile),
                        e,
                        file=sys.stderr,
                    )
                    compute = True
                if not compute:
                    old_a1, old_a2, old_a3 = list(
                        map(float,
                            re.split(r",|\(|\)", old["comments"][0])[1:4]))
                    if not (a1, a2, a3) == (old_a1, old_a2, old_a3):
                        print(
                            "WARNING: old dx-file %s treated %s with index %d. This is also my index but I treat %s. Recomputing."
                            % (oldfile, old["comments"][0], anglecount,
                               angle_comment),
                            file=sys.stderr,
                        )
                        compute = True
                    else:
                        energies = old["data"].tolist()
                        del old
                if not compute:
                    if not len(transgrid) == len(energies):
                        print(
                            "WARNING: old dx-file %s contains %d entries but the spatial grid is supposed to have %d entries. Recomputing."
                            % (oldfile, len(energies), len(transgrid)),
                            file=sys.stderr,
                        )
                        compute = True
            else:
                compute = True

            if compute or savetemplate:
                obmol = OBAggregate(defaultobmol)
                obff = OBForceField.FindForceField(ffname)
                rotfunc = obmol.RotatePart
                rotfunc(0, 1, a1)
                rotfunc(0, 2, a2)
                rotfunc(0, 3, a3)

            if compute:
                energies = _trans_en(
                    obmol,
                    obff,
                    transgrid,
                    maxval * 1.2,
                    cutoff,
                    vdw_scale,
                    report=report,
                    notmasked=notmasked,
                )

                if correct or dx_dict["save_dx"]:
                    # create a copy which can then be changed and possibly saved
                    tempenergies = copy.copy(energies)

                if correct:
                    try:
                        actualmax = max(
                            (e for e in tempenergies if not e >= maxval))
                    except ValueError:
                        actualmax = maxval
                    tempenergies = [
                        actualmax if e >= maxval else e for e in tempenergies
                    ]

                if dx_dict["save_dx"]:
                    print_dx_file(
                        str(anglecount) + "_",
                        True,
                        dx_dict,
                        tempenergies,
                        angle_comment,
                    )

                if correct or dx_dict["save_dx"]:
                    del tempenergies

            if savetemplate:
                minindex = energies.index(min(energies))
                template = grid[minindex][0]

                obmol.TranslatePart(0, template)
                if obmol.IsGoodVDW(vdw_scale):
                    if save_noopt:
                        filename = (templateprefix + str(anglecount) + "_" +
                                    angle_string + ".xyz")
                        pybel.Molecule(obmol).write("xyz",
                                                    filename,
                                                    overwrite=True)
                    if save_opt:
                        filename = (templateprefix + "opt_" + str(anglecount) +
                                    "_" + angle_string + ".xyz")
                        p_tempmol = pybel.Molecule(obmol)
                        p_tempmol.localopt(forcefield=ffname, steps=optsteps)
                        p_tempmol.write("xyz", filename, overwrite=True)

            # returning the molecule to its original state is not necessary since every worker process
            # creates its own instance and leaves the original one as is

    except KeyboardInterrupt:
        print(
            "Terminating worker process " + str(os.getpid()) + " prematurely.",
            file=sys.stderr,
        )

    return anglecount, (a1, a2, a3), energies, minindex
Esempio n. 4
0
the hashes of their names. Usage:

hashsort.py DIRECTORY [REGEX]

The mandatory first argument DIRECTORY must be a directory that already exists.
All files in this directory whose names match the regular expression provided
as the second, optional argument (default: '^[1-9][0-9]*_out.dx$') will be sorted
into subdirectories. This will use the MD5 hasing algorithm.
"""

# default process name
PROCNAME = "HashSort"
try:
    from FireDeamon import set_procname

    set_procname(PROCNAME)
except ImportError:
    set_procname = lambda s: None

# default regular expression
REGEX = "^[1-9][0-9]*_out.dx$"


def entrypoint():
    for arg in sys.argv:
        if arg == "--help" or arg == "-h":
            print(HELPTEXT)
            exit(0)
    if len(sys.argv) < 3:
        raise ValueError("Not enough arguments given.")
    dir = sys.argv[1]
def _main(input_file):
    # default config
    config = DEFAULT_CONFIG
    options = [o for o in config] + list(
        set([mo for mopts in MANDATORY_OPTIONS.values() for mo in mopts]))
    parser = read_config_file(input_file, defaults=config, nocase=True)
    jobtype_list = parser.get_str("jobtype")
    # jobtypes have long and short names but both shall be treated the same so the
    # following is a mapping of the long and short forms to a unified form
    jobtype_dict = {
        "scan": "scan",
        "s": "scan",
        "minimasearch": "minima search",
        "ms": "minima search",
        "similarityscreening": "similarity screening",
        "ss": "similarity screening",
    }
    functions_dict = {
        "scan": scan_main,
        "minima search": minimasearch_main,
        "similarity screening": similarityscreening_main,
    }
    # Jobs have to be performed in a certain order to make sense. The order is given
    # in the following dictionary (starting at 0 and increasing):
    order_dict = {"scan": 0, "minima search": 1, "similarity screening": 2}
    try:
        # sort the jobtypes to be performed by an increasing order parameter
        jobtype_list = sorted(
            # since each job must not be performed multiple times, use a set to get the
            # irreducible set of jobtypes
            set(
                # create a list of tuples each consisting of the desired jobtype name
                # and its order parameter
                [(jobtype_dict[e.lower()], order_dict[jobtype_dict[e.lower()]])
                 for e in jobtype_list.split(",")]),
            key=operator.itemgetter(1),
        )
    except KeyError as e:
        raise ValueError(
            "Given short or long form does not match any known jobtype: %s" %
            (e))
    # check whether all mandatory options are present
    missing_options = []
    for jobtype, discard in jobtype_list:
        for opt in MANDATORY_OPTIONS[jobtype]:
            try:
                parser.get_str(opt)
            except NoOptionInConfigFileError:
                missing_options.append(opt)
    if len(missing_options) > 0:
        print(
            "ERROR: could not find the following mandatory options in the config file:",
            file=sys.stderr,
        )
        for o in missing_options:
            print(o, file=sys.stderr)
        raise NoOptionInConfigFileError("Incomplete input.")
    del missing_options
    unknown_options = parser.check_against(options)
    if len(unknown_options) > 0:
        print(
            "WARNING: the following are unknown lines in the config file:",
            file=sys.stderr,
        )
        for o in unknown_options:
            print(o, file=sys.stderr)
        print(file=sys.stderr)
    del unknown_options
    if parser.get_boolean("config_check"):
        print("This is a check of the config file.")
    for jobtype, __ in jobtype_list:
        jobtype_main = functions_dict[jobtype]
        print("Running %s..." % (jobtype))
        jobtype_main(parser)
        set_procname(PROCNAME)
        print("...finished %s\n" % (jobtype))
    if parser.get_boolean("config_check"):
        print("Config file seems fine.")
def _minimasearch_process(args):
    """Each worker process executes this function.

    Args:
        args: (list) arguments to be passed to the worker processes (via
            pickling)
    """
    global data_ms

    c_neighbour_list, terminating, PROCNAME = data_ms

    set_procname(PROCNAME + ".%d" % (os.getpid()))

    try:
        if not terminating.is_set():
            (
                single_file,
                degeneration,
                nr_neighbours,
                progress,
                upper_cutoff,
                lower_cutoff,
                depths_sort,
                gzipped,
            ) = args
            try:
                temp = read_dx(
                    single_file,
                    grid=False,
                    data=True,
                    silent=True,
                    comments=True,
                    gzipped=gzipped,
                )
            except ValueError as e:
                print(
                    "Error when reading in dx-file %s, skipping. Error was:" %
                    (single_file),
                    e,
                    file=sys.stderr,
                )
                return None

            a1, a2, a3 = list(
                map(float,
                    re.split(r",|\(|\)", temp["comments"][0])[1:4]))
            tempvalues = temp["data"]

            depths = []
            minima = LocalMinimaPy(
                c_neighbour_list,
                tempvalues,
                degeneration,
                nr_neighbours,
                prog_report=(progress == 1),
                upper_cutoff=upper_cutoff,
                lower_cutoff=lower_cutoff,
                sort_it=depths_sort,
                depths=depths,
            )
    except KeyboardInterrupt:
        print(
            "Terminating worker process " + str(os.getpid()) + " prematurely.",
            file=sys.stderr,
        )

    if depths_sort == 0:
        depths = [0.0] * len(minima)
    return minima, depths, [tempvalues[m] for m in minima], (a1, a2, a3)
def minimasearch_main(parser):
    """Main control function for the minima search procedure.

    Args:
        parser: (of class ManipulateAggregates.collection.read.SectionlessConfigParser)
            contains information about the config file. Defines the methods
            "get_str", "get_int", "get_float" and "get_boolean" to get the
            appropriate data type.
    """
    set_procname(PROCNAME)
    gets = parser.get_str
    geti = parser.get_int
    getf = parser.get_float
    getb = parser.get_boolean
    do_calculate = not (getb("config_check"))
    # do some error checking
    # value for progress reports
    if geti("progress") not in [0, 1, 2]:
        raise ValueError(
            'Wrong value for parameter "progress" given. Must be 0,1 or 2.')
    else:
        progress = geti("progress")
    # check whether partitioning over nodes was switched on
    if not gets("partition") == "1/1":
        raise ValueError(
            "Parallelization unequal 1/1 not suported for minima search.")
    # boolean values
    # NONE YET PRESENT FOR THIS JOBTYPE
    # string values
    if not gets("neighbour_check_type") in [
            "eukledian",
            "manhattan_single",
            "manhattan_multiple",
    ]:
        raise ValueError(
            "Option neighbour_check must be 'eukledian', 'manhattan_single' or 'manhattan_multiple'."
        )
    # float values (or lists of floats)
    cutoff_scale = getf("cutoff_scale")

    init_hashing(geti("hashdepth"), geti("hashwidth"), gets("hashalg"))

    for check_option in ["degeneration", "maxval", "depths_sort"]:
        getf(check_option)
    # check whether some options conflict
    # NO CONFLICTS KNOWN YET

    # spatial grid: check gridtype and set-up grid
    # read in parameters that are required in any case for the appropriate gridtypes
    if gets("sp_gridtype") in ("full", "half"):
        np_counts = numpy.array(list(map(int,
                                         gets("countsxyz").split(","))),
                                dtype=int)
        np_del = numpy.array(list(map(float,
                                      gets("distxyz").split(","))),
                             dtype=float)
        np_org = numpy.array([0, 0, 0], dtype=float)
    # treat auto-adjustment
    if not gets("sp_autoadjust") in ("", "none"):
        gfdict = {"TYPE": "%s" % (gets("sp_gridtype"))}
        # For each type of grid, define which parameters are allowed to be auto-adjusted
        # and how many can be adjusted at the same time.
        # Also, generate string representations of what you expect to find in the
        # gridfile in order to find out which parameters deviate.
        if gets("sp_gridtype") in ("full", "half"):
            gfdict["COUNTS"] = "%d,%d,%d" % tuple(np_counts)
            gfdict["DIST"] = "%.3f,%.3f,%.3f" % tuple(np_del)
            gfdict["ORG"] = "%.3f,%.3f,%.3f" % tuple(np_org)
            gfallowed = ("COUNTS", "DIST")
            gfmaxadjust = 1
        else:
            raise ValueError(
                "WARNING: grid auto-adjustment not supported for current gridtype %s"
                % (gets("sp_gridtype")))
        if gets("sp_gridtype") == "half":
            gfdict["TYPE"] += gets("halfspace")
        # Check whether file containing information about auto-adjustment exists and
        # whether too many deviations are found.
        try:
            with open(gets("sp_gridsave"), "r") as gf:
                deviations = 0
                devstring = ""
                for line in gf:
                    l = line.rstrip().split()
                    if len(l) != 2:
                        raise ValueError(
                            "Gridtype file %s must not contain more or less than 2 columns per line."
                        )
                    if l[0] in gfallowed:
                        if gfdict[l[0]] != l[1]:
                            deviations += 1
                            devstring += l[0] + " "
                            gfdict[l[0]] = l[1]
                    else:
                        if gfdict[l[0]] != l[1]:
                            raise ValueError(
                                "Mandatory parameter %s is not equal for grid defined in config file and grid defined in grid file %s."
                                % (l[0], gets("sp_gridsave")))
                if deviations > gfmaxadjust:
                    raise ValueError(
                        "The grid used for the scan (see file %s) deviates from the one defined "
                        % (gets("sp_gridsave")) +
                        "in the config file by more than %d allowed parameter(s), namely %s. It should be:"
                        % (gfmaxadjust, devstring),
                        gfdict,
                    )
                elif deviations == 0:
                    print(
                        "...successfully read in grid used for the scan (was not auto-adjusted)..."
                    )
                else:
                    print(
                        "...successfully read in grid used for the scan (auto-adjusted in %d parameters: %s)..."
                        % (deviations, devstring))
        except (IOError, OSError) as e:
            raise IOError(
                "Auto-adjustment of spatial grid requested but file containing grid information %s could not be opened."
                % (gets("sp_gridsave")),
                e,
            )
        # For each gridtype, use the deviating values.
        if gets("sp_gridtype") in ("full", "half"):
            for ds in devstring.split():
                if ds == "COUNTS":
                    np_counts = numpy.array(list(
                        map(int, gfdict[ds].split(","))),
                                            dtype=int)
                elif ds == "DIST":
                    np_del = numpy.array(list(map(float,
                                                  gfdict[ds].split(","))),
                                         dtype=float)
    if gets("sp_gridtype") == "full":
        gets("suffix")
    elif gets("sp_gridtype") == "half":
        np_counts_pos = numpy.array([c for c in np_counts])
        np_counts_neg = numpy.array([c for c in np_counts])
        halfspace_vec = list(map(int, gets("halfspace").split(",")))
        for i in (0, 1, 2):
            if halfspace_vec[i] < 0:
                np_counts_pos[i] = abs(halfspace_vec[i])
            if halfspace_vec[i] > 0:
                np_counts_neg[i] = abs(halfspace_vec[i])
        gets("suffix")
    else:
        raise ValueError("Wrong value for config value sp_gridtype.")

    if gets("distance_cutoff") == "auto":
        if (gets("neighbour_check_type") == "eukledian"
                or gets("neighbour_check_type") == "manhattan_single"):
            raise ValueError(
                "Value 'auto' for option 'distance_cutoff' only supported for 'manhattan_multiple' and grids 'full' or 'half'."
            )
        elif gets("neighbour_check_type") == "manhattan_multiple":
            if gets("sp_gridtype") in ("full", "half"):
                distance_cutoff = list(cutoff_scale * np_del)
            else:
                raise ValueError(
                    "Value 'auto' for option 'distance_cutoff' only supported for grids 'full' or 'half'."
                )
        else:
            raise Exception("Wrong value for option 'neighbour_check_type'.")
    else:
        if (gets("neighbour_check_type") == "eukledian"
                or gets("neighbour_check_type") == "manhattan_single"):
            distance_cutoff = getf("distance_cutoff")
        elif gets("neighbour_check_type") == "manhattan_multiple":
            try:
                distance_cutoff = list(
                    map(
                        lambda s: cutoff_scale * float(s),
                        gets("distance_cutoff").split(","),
                    ))
            except ValueError:
                raise TypeError(
                    "Each element of option distance_cutoff must be of type float."
                )
            if len(distance_cutoff) != 3:
                raise ValueError(
                    "Option 'distance_cutoff' must have three entries for 'manhattan_multiple'."
                )
        else:
            raise Exception("Wrong value for option 'neighbour_check_type'.")

    # get number of neighbours to search
    nr_shells = None
    if gets("nr_neighbours") == "auto":
        if gets("neighbour_check_type") == "manhattan_multiple" and gets(
                "sp_gridtype") in ("full", "half"):
            nr_neighbours = [
                2 * int(1.0 * distance_cutoff[i] / np_del[i]) + 1
                for i in range(3)
            ]
            tmp = nr_neighbours[0]
            if all((i == tmp for i in nr_neighbours)):
                nr_shells = int((tmp - 1) // 2)
            nr_neighbours = nr_neighbours[0] * nr_neighbours[
                1] * nr_neighbours[2] - 1
        else:
            raise ValueError(
                "Value 'auto' for 'nr_neighbours' only supported for 'manhattan_multiple' and the sp_gridtypes 'full' and 'half'."
            )
    else:
        nr_neighbours = geti("nr_neighbours")
    if gets("max_nr_neighbours") == "auto":
        max_nr_neighbours = nr_neighbours
    else:
        max_nr_neighbours = geti("max_nr_neighbours")

    if gets("volumetric_data").startswith("from_scan,"):
        config_data = gets("volumetric_data").split(",")
        if len(config_data) == 2:
            # angular grid: check gridtype and set-up grid
            if gets("ang_gridtype") == "full":
                # these are the counts and distances for rotation
                countsposmain = numpy.array(list(
                    map(int,
                        gets("countspos").split(","))),
                                            dtype=int)
                countsnegmain = numpy.array(list(
                    map(int,
                        gets("countsneg").split(","))),
                                            dtype=int)
                totcounts = countsposmain + countsnegmain + 1
                nr_dx_files = 1
                for c in totcounts:
                    nr_dx_files *= c
            else:
                raise ValueError(
                    "Option 'volumetric_data' of 'from_scan' only supported for 'ang_gridtype'=='full'"
                )
            filenames = []
            reuse_ids = {}
            for f in range(1, nr_dx_files + 1):
                reuse_ids[f] = True
            # reuse_ids = {f:True for f in range(1,nr_dx_files+1)}
            if len(gets("scan_restartdirs")) > 0:
                print(
                    "...checking which dx-files are present in old directories..."
                )
                olddxfiles = get_old_dxfiles(
                    gets("scan_restartdirs").split(","), gets("suffix"))
                for c in olddxfiles:
                    reuse_ids[c] = False
                # reuse_ids.update({c:False for c in olddxfiles})
                filenames += [olddxfiles[c] for c in olddxfiles]
            discard, directory = config_data
            if os.path.isdir(directory):
                print(
                    "...checking which dx-files are present in directory created by a previous scan..."
                )
                filenames += [
                    hashIO.hashpath(directory + os.sep + str(f) + "_" +
                                    gets("suffix")) for f in reuse_ids
                    if reuse_ids[f]
                ]
            else:
                print(
                    "WARNING: directory %s that should contain old dx-files does not exist."
                    % (directory),
                    file=sys.stderr,
                )
            print("...determining which files are missing...")
            dx_files = sorted(
                [
                    f for f in filenames
                    if os.path.exists(f) and os.stat(f).st_size > 0
                ],
                key=str.lower,
            )
            missing_dx_files = sorted(
                [
                    f.split(os.sep)[-1] for f in filenames
                    if not os.path.exists(f) or os.stat(f).st_size <= 0
                ],
                key=str.lower,
            )
            if len(dx_files) != nr_dx_files:
                print(
                    "WARNING: some files that were expected to be generated by the scan in directory '%s' are missing"
                    % (directory),
                    file=sys.stderr,
                )
                if len(gets("scan_restartdirs")) > 0:
                    print(
                        "         and could not be supplied from previous runs in the directories: %s"
                        % (gets("scan_restartdirs")),
                        file=sys.stderr,
                    )
                print(
                    "Missing files: " + ", ".join(
                        sorted(
                            missing_dx_files,
                            key=lambda e:
                            # e.split(os.sep)[-1] is the name of the file (NAME)
                            # NAME.split("_"+gets("suffix"))[0] is the numer of the dx-file
                            int(
                                e.split(os.sep)[-1].split("_" + gets("suffix"))
                                [0]),
                        )),
                    file=sys.stderr,
                )
        else:
            raise ValueError(
                'Wrong format for parameter "volumetric_data" starting with "from_scan" given. Must be "from_scan,DIR".'
            )
    elif gets("volumetric_data").startswith("dir_regex,"):
        config_data = gets("volumetric_data").split(",")
        if len(config_data) == 3:
            discard, directory, regex = config_data
            if os.path.isdir(directory):
                dx_files = [
                    f for f in hashIO.listfiles(
                        directory, regex, nullsize=False, nulldepth=False)
                ]
            else:
                raise ValueError("Given directory " + directory +
                                 " is not a directory.")
        else:
            raise ValueError(
                'Wrong format for parameter "volumetric_data" starting with "dir_regex" given. Must be "dir_regex,DIR,REGEX".'
            )
    else:
        raise ValueError(
            'Wrong value for parameter "volumetric_data" given. Must be "from_scan,DIR" or "dir_regex,DIR,REGEX".'
        )

    use_regular = ((nr_shells is not None)
                   and (gets("sp_gridtype") in ("full", "half"))
                   and (gets("neighbour_check_type") == "manhattan_multiple")
                   and (gets("nr_neighbours") == "auto"))

    if use_regular:
        if gets("max_nr_neighbours") != "auto":
            print(
                "WARNING: the value for 'max_nr_neighbours' is not used for regular grids.",
                file=sys.stderr,
            )
        print(
            "...using fast neighbour-search algorithm for regular grid with %d neighbour shells..."
            % (nr_shells))
    else:
        print("...using slow neighbour-search algorithm for irregular grid...")
        print("Conditions not fulfilled for fast algorithm:")
        regular_dict = {
            "sp_gridtype in ('full','half')":
            gets("sp_gridtype") in ("full", "half"),
            "neighbour_check_type == manhattan_multiple":
            gets("neighbour_check_type") == "manhattan_multiple",
            "nr_neighbours == auto":
            gets("nr_neighbours") == "auto",
        }
        tmpstring = "int(distance_cutoff[i]/distxyz[i]) not the same for i in {0,1,2}"
        if (gets("sp_gridtype") in ("full", "half")
                and gets("neighbour_check_type") == "manhattan_multiple"
                and gets("nr_neighbours") == "auto"):
            regular_dict[tmpstring] = nr_shells is not None
        else:
            regular_dict[tmpstring] = True
        for reason in regular_dict:
            if not regular_dict[reason]:
                print("     %s" % (reason))
    geti("pool_chunksize")

    if not do_calculate:
        if len(dx_files) == 0:
            print(
                "WARNING: some of the dx-files you requested are currently not present, which might.",
                file=sys.stderr,
            )
            print(
                "         not be a problem since this is only a config check.",
                file=sys.stderr,
            )
        return

    if len(dx_files) == 0:
        raise RuntimeError(
            "Could not find any non-empty dx-files matching the given criteria."
        )

    # sorting the dx-files by name to always get a sorted minima file
    dx_files = sorted(
        dx_files,
        key=lambda e:
        # e.split(os.sep)[-1] is the name of the file (NAME)
        # NAME.split("_"+gets("suffix"))[0] is the numer of the dx-file
        int(e.split(os.sep)[-1].split("_" + gets("suffix"))[0]),
    )
    print("...sorted list of dx-files...")

    if gets("sp_gridtype") == "full":
        np_grid = general_grid(np_org, np_counts, np_counts, np_del)
    elif gets("sp_gridtype") == "half":
        np_grid = general_grid(np_org, np_counts_pos, np_counts_neg, np_del)
    else:
        raise ValueError("Wrong value for config value sp_gridtype.")

    pos_from_index = lambda index: np_grid[index]

    if use_regular:
        if gets("sp_gridtype") == "full":
            FD_nr_points = list(map(lambda c: 2 * c + 1, np_counts))
        elif gets("sp_gridtype") == "half":
            FD_nr_points = list(np_counts_pos + np_counts_neg + 1)
        else:
            raise ValueError("Wrong value for config value sp_gridtype.")
        c_neighbour_list = RegularNeighbourListPy(FD_nr_points,
                                                  int(nr_shells),
                                                  prog_report=False,
                                                  exclude_border=True)
    else:
        c_neighbour_list = IrregularNeighbourListPy(
            np_grid,
            nr_neighbours,
            distance_cutoff,
            max_nr_neighbours=max_nr_neighbours,
            prog_report=(progress == 1),
            cutoff_type=gets("neighbour_check_type"),
            sort_it=False,
        )
    print("...generated neighbour list...")

    try:
        nr_threads = int(os.environ["OMP_NUM_THREADS"])
    except KeyError:
        nr_threads = 1
    except ValueError:
        nr_threads = 1

    # how to properly handle keyboard interrupts when multi processing has been taken from:
    # http://stackoverflow.com/questions/14579474/multiprocessing-pool-spawning-new-childern-after-terminate-on-linux-python2-7
    terminating = Event()

    # global data_ms                            #DEBUG
    # data_ms = (c_neighbour_list, terminating) #DEBUG

    args = [[
        single_file,
        getf("degeneration"),
        nr_neighbours,
        progress,
        getf("maxval"),
        None,
        geti("depths_sort"),
        getb("gzipped"),
    ] for single_file in dx_files]

    if not gets("minima_file_save").endswith(".gz"):
        minima_file = io.open(gets("minima_file_save"), "wb")
    else:
        try:
            from subprocess import Popen, PIPE

            gzipprocess = Popen(
                ["gzip", "-6", "-c", "-"],
                stdin=PIPE,
                stdout=io.open(gets("minima_file_save"), "wb"),
                bufsize=4096,
            )
            minima_file = gzipprocess.stdin
        except ImportError:
            print(
                "WARNING: cannot import gzip module, will treat %s as a non-gzipped one."
                % (gets("minima_file_save")[0:-3]),
                file=sys.stderr,
            )
            minima_file = io.open(gets("minima_file_save")[0:-3], "wb")
        except OSError:
            print(
                "WARNING: cannot import gzip module, will treat %s as a non-gzipped one."
                % (filename),
                file=sys.stderr,
            )
            minima_file = io.open(gets("minima_file_save")[0:-3], "wb")

    minima_file.write(
        hashstring("#%s FF: %s\n" %
                   (gets("minima_file_save"), gets("forcefield"))))

    dx_file_count = 0
    dx_file_max = len(args)

    try:
        chunksize = geti("pool_chunksize")
        while dx_file_count < dx_file_max:
            pool = Pool(
                nr_threads,
                initializer=_minimasearch_parallel_init,
                initargs=(c_neighbour_list, terminating, PROCNAME),
            )  # NODEBUG
            chunkstart = dx_file_count
            if chunkstart + chunksize > dx_file_max:
                chunkend = dx_file_max
            else:
                chunkend = chunkstart + chunksize
            # loop over all dx-files via worker processes
            for temp in pool.imap(_minimasearch_process,
                                  args[chunkstart:chunkend]):  # NODEBUG
                # for arg in args:                                            #DEBUG
                dx_file_count += 1
                # temp = _minimasearch_process(arg)                           #DEBUG
                if temp is None:
                    if progress > 0:
                        print("Skipping dx-file %d of %d: read error" %
                              (dx_file_count, dx_file_max))
                    continue
                minima, depths, min_energies, (a1, a2, a3) = temp
                tmplen = list(map(len, (minima, depths, min_energies)))
                if min(tmplen) <= 0:
                    if progress > 0:
                        print("Skipping dx-file %d of %d: no minima found" %
                              (dx_file_count, dx_file_max))
                    continue
                if not (min(tmplen) == max(tmplen)):
                    if progress > 0:
                        print(
                            "Error while processing dx-file %d of %d" %
                            (dx_file_count, dx_file_max),
                            file=sys.stderr,
                        )
                    raise RuntimeError(
                        "Error while processing dx-file %d of %d: lists do not have equal lengths"
                        % (dx_file_count, dx_file_max))
                if progress > 0:
                    print("Processing dx-file %d of %d: %d minima" %
                          (dx_file_count, dx_file_max, len(minima)))

                for minimum, depth, min_energy in zip(minima, depths,
                                                      min_energies):
                    min_pos = pos_from_index(minimum)
                    minima_file.write(
                        hashstring(
                            "%10d     %15.8f %15.8f %15.8f     %15.8f %15.8f %15.8f     %15.8E   %E \n"
                            % (
                                minimum,
                                min_pos[0],
                                min_pos[1],
                                min_pos[2],
                                a1,
                                a2,
                                a3,
                                min_energy,
                                depth,
                            )))
                del (
                    minimum,
                    depth,
                    min_energy,
                    minima,
                    depths,
                    min_energies,
                    a1,
                    a2,
                    a3,
                    temp,
                    min_pos,
                )
            pool.close()  # NODEBUG
            pool.join()  # NODEBUG
    except KeyboardInterrupt as e:
        print("Caught keyboard interrupt.", file=sys.stderr)
        pool.terminate()  # NODEBUG
        pool.join()  # NODEBUG
        print("Terminating main routine prematurely.", file=sys.stderr)
        minima_file.close()
        raise e
    minima_file.close()
    if gets("minima_file_save").endswith(".gz"):
        gzipprocess.wait()
Esempio n. 8
0
def similarityscreening_main(parser):
    """Main control function for the similarity screening procedure.

    Args:
        parser: (of class ManipulateAggregates.collection.read.SectionlessConfigParser)
            contains information about the config file. Defines the methods
            "get_str", "get_int", "get_float" and "get_boolean" to get the
            appropriate data type.
    """
    set_procname(PROCNAME)
    gets = parser.get_str
    geti = parser.get_int
    getf = parser.get_float
    getb = parser.get_boolean
    do_calculate = not (getb("config_check"))

    # value for progress reports
    if geti("progress") not in [0, 1, 2]:
        raise ValueError(
            'Wrong value for parameter "progress" given. Must be 0,1 or 2.')
    progress = geti("progress")
    # check whether partitioning over nodes was switched on
    if not gets("partition") == "1/1":
        raise ValueError(
            "Parallelization unequal 1/1 not suported for similarity screening."
        )

    mol1 = read_from_file(gets("geometry1"), ff=None)
    mol2 = read_from_file(gets("geometry2"), ff=None)
    nr_ats1 = mol1.obmol.NumAtoms()
    nr_ats2 = mol2.obmol.NumAtoms()

    obmol = prepare_molecules(mol1, mol2, align=getb("prealign"))

    std_map = maagbel.StdMapStringString()
    # add the appropriate configuration paramters to the std::map<std::string,std::string>
    std_map["ffname"] = gets("forcefield")

    # try to find the chosen force field
    if gets("forcefield").lower() not in ["uff", "mmff94", "gaff", "ghemical"]:
        raise ValueError(
            'Wrong force field given. Only "uff", "mmff94", "gaff" and "ghemical" will be accepted.'
        )
    temp_ff = maagbel.OBForceField.FindType(gets("forcefield").lower())
    if temp_ff is None:
        raise RuntimeError(
            "Somehow there was an error loading the forcefield %s (although it should be known to OpenBabel)."
            % (gets("forcefield").lower()))
    try:
        if getf("energy_cutoff") >= 0:
            if getb("use_ff_units"):
                print(
                    "...using given energy in force field units: %.6f %s (equals %.6f meV)"
                    % (
                        getf("energy_cutoff"),
                        temp_ff.GetUnit(),
                        getf("energy_cutoff") /
                        E_UNIT_CONVERSION[temp_ff.GetUnit()],
                    ))
                std_map["ecutoff"] = str(getf("energy_cutoff"))
            else:
                std_map["ecutoff"] = str(
                    getf("energy_cutoff") *
                    E_UNIT_CONVERSION[temp_ff.GetUnit()])
                print(
                    "...converting given energy cutoff to force field units: %s meV -> %.6f %s"
                    % (
                        gets("energy_cutoff"),
                        getf("energy_cutoff") *
                        E_UNIT_CONVERSION[temp_ff.GetUnit()],
                        temp_ff.GetUnit(),
                    ))
        else:
            std_map["ecutoff"] = str(-100)
    except KeyError as e:
        raise RuntimeError(
            "Unknown unit type '%s' of the chosen force field '%s', cannot convert the energy cutoff in meV to that unit. KeyError was: %s. Known units are: %s"
            % (
                temp_ff.GetUnit(),
                gets("forcefield").lower(),
                e,
                ", ".join([t for t in E_UNIT_CONVERSION]),
            ))
    finally:
        del temp_ff

    postalign = getb("postalign")
    geti("symprec")
    geti("maxscreensteps")

    if not (gets("consider_h1") == "" and
            (gets("consider_h2") in ("", "SAME"))):
        # treat 'consider_h1'
        if len(gets("consider_h1")) != 0:
            try:
                tmp_h1 = list(map(int, gets("consider_h1").split(",")))
            except ValueError as e:
                raise ValueError(
                    "Could not parse consider_h1, must be comma-separated ints."
                )
            if len(tmp_h1) != 0:
                if min(tmp_h1) < 1 or max(tmp_h1) > nr_ats1:
                    raise ValueError(
                        "Indices for consider_h1 must be >=%d and <=%d" %
                        (1, nr_ats1))
        else:
            tmp_h1 = []
        # treat 'consider_h2'
        if gets("consider_h2") == "SAME":
            if gets("geometry1") != gets("geometry2") and len(tmp_h1) != 0:
                raise ValueError(
                    "Can only use 'SAME' for consider_h2 when geometry1 and geometry2 are identical."
                )
            else:
                tmp_h2 = [i + nr_ats1 for i in tmp_h1]
        elif len(gets("consider_h2")) != 0:
            try:
                tmp_h2 = list(
                    map(lambda s: int(s) + nr_ats1,
                        gets("consider_h2").split(",")))
            except ValueError as e:
                raise ValueError(
                    "Could not parse 'consider_h2', must be comma-separated ints."
                )
            if (len(tmp_h2) != 0 and min(tmp_h2) < nr_ats1 + 1
                    or max(tmp_h2) > nr_ats1 + nr_ats2):
                raise ValueError(
                    "Indices for consider_h1 must be >=%d and <=%d" %
                    (1, nr_ats2))
        else:
            tmp_h2 = []
        important_hs = ",".join(map(str, tmp_h1 + tmp_h2))
        std_map["imp-H"] = important_hs

    pgstep = -1
    if getb("pointgroups"):
        if gets("pgstep") == "last":
            pgstep = gets("pgstep")
        elif gets("pgstep") == "first":
            pgstep = 1
        else:
            pgstep = geti("pgstep")
            if pgstep < 0:
                raise ValueError("The given pgstep must be >=0.")
    getb("subgroups")
    getb("exclude_c1")
    if getb("pgwrite"):
        pgfilename = gets("pgfile")
    else:
        pgfilename = None
    if len(gets("pgregex")) != 0:
        pgregex = re.compile(gets("pgregex"))
    else:
        pgregex = None
    if pgstep != -1 and (len(gets("pgregex")) == 0 and not getb("pgwrite")):
        pgstep = -1
        print(
            "WARNING: pgwrite is False and no pgregex given -> pointgroups will not be determined in step "
            + gets("pgstep"),
            file=sys.stderr,
        )

    if not do_calculate:
        return

    # to avoid segfaults, define some bogus input parameters that would normally be
    # given via the command-line
    in_out_options = maagbel.OBConversion()
    in_out_options.SetInFormat("nul")
    in_out_options.SetOutFormat("nul")

    # create a new OBAggregate instance that can contain a single aggregate and
    # that will walk through all the minima that were found. Each of these geometries
    # will be added to obmol as a new conformer so that the OBOp SimSearch can
    # perform its screening duty
    saveobmol = maagbel.OBAggregate(obmol)  # copy constructor
    obmol.DeleteConformers(0,
                           obmol.NumConformers() -
                           1)  # clean all conformer information
    if obmol.NumConformers() != 0:
        raise RuntimeError(
            "Could not clear conformer information, still %d left." %
            (obmol.NumConformers()))

    tempmol = maagbel.OBAggregate(saveobmol)
    sameff = True

    gzipped = gets("minima_file_load").endswith(".gz")
    minima_file = gets("minima_file_load")
    if gzipped:
        if not (os.path.exists(gets("minima_file_load"))) and os.path.exists(
                gets("minima_file_load")[0:-3]):
            print(
                "WARNING: could not find gzipped minima file but non-gzipped one. Will use the latter.",
                file=sys.stderr,
            )
            catproc = Popen(["cat", gets("minima_file_load")[0:-3]],
                            stdout=PIPE)
            minima_file = gets("minima_file_load")[0 - 3]
            gzipped = False
        else:
            catproc = Popen(["zcat", gets("minima_file_load")], stdout=PIPE)
            gzipped = True
    else:
        catproc = Popen(["cat", gets("minima_file_load")], stdout=PIPE)
        minima_file = gets("minima_file_load")
        gzipped = False
    awkproc = Popen(["awk", '{print $5 " " $6 " " $7}'],
                    stdin=catproc.stdout,
                    stdout=PIPE)
    uniqproc = Popen(["uniq"], stdin=awkproc.stdout, stdout=PIPE)
    f = uniqproc.stdout

    # angles should be in a monotonically nondecreasing order
    angles = [
        tuple(map(float,
                  line.rstrip().split()[0:3]))
        for line in (tobasestring(bl) for bl in f)
        if not line.startswith("#") and len(line.split()) == 3
    ]
    if not angles == list(sorted(angles)):
        print(
            "WARNING: minima file was not in sorted order with respect to the angles.",
            file=sys.stderr,
        )
        print(
            "         Beware that results might change slightly if the order is changed.",
            file=sys.stderr,
        )
    del angles

    f.close()
    catproc.wait()
    awkproc.wait()
    uniqproc.wait()

    old_angles = (-float("inf"), -float("inf"), -float("inf"))
    ang = [0.0, 0.0, 0.0]  # current angles
    disp = [0.0, 0.0, 0.0]  # current displacement
    if progress > 0:
        print("...adding minima geometries to data structure...")
    printcount = 0
    if gzipped:
        f = gziplines(minima_file)
    else:
        f = open(gets("minima_file_load"), "r")
    transfunc = tempmol.TranslatePart
    rotfunc = tempmol.RotatePart
    coordfunc = tempmol.GetCoordinates
    for line in f:
        if not line.startswith("#"):
            linevals = line.rstrip().split()
            disp = list(map(float, linevals[1:4]))
            pos_disp = double_array(disp)
            neg_disp = double_array([-v for v in disp])
            ang = tuple(map(float, linevals[4:7]))
            if ang != old_angles:
                if progress > 0 and printcount % 10 == 0:
                    print(
                        ERASE_LINE +
                        "...re-creating aggregate with new angles: (%8.2f,%8.2f,%8.2f)..."
                        % ang + CURSOR_UP_ONE)
                    printcount = 0
                printcount += 1
                tempmol.Assign(saveobmol)
                # since python needs quite some time to access an objects member, saving
                # a member saves time
                a1, a2, a3 = ang
                old_angles = ang
                rotfunc(0, 1, a1)
                rotfunc(0, 2, a2)
                rotfunc(0, 3, a3)
            transfunc(0, pos_disp)
            # actually deep-copy the new coordinates to avoid segfaults
            obmol.AddConformer(coordfunc(), True)
            transfunc(0, neg_disp)
        else:
            l = line.split()
            if len(l) >= 3 and l[1] == "FF:":
                print(
                    "\n...determining force field used to create the minima file %s..."
                    % (gets("minima_file_load")))
                if l[2].lower() != gets("forcefield").lower():
                    print(
                        "...old force field '%s' is not the same as the current one '%s'..."
                        % (l[2].lower(), gets("forcefield").lower()))
                    sameff = False
                else:
                    print(
                        "...minima file was created using the current force field..."
                    )
                    sameff = True

    f.close()
    if progress > 0:
        print()

    print("...%d aggregates have been processed..." % (obmol.NumConformers()))
    if obmol.NumConformers() <= 0:
        print(
            "\n...not a single conformer was processed, hence we're done...\n")
        return

    # force maagbel to be verbose if detailed progress reports were requested
    if progress == 1:
        std_map["verbose"] = "true"

    simscreen = maagbel.OBOp.FindType("simscreen")

    prescreen = False
    screenstring = ""
    if geti("symprec") >= 0:
        if prescreen:
            screenstring += "and "
        prescreen = True
        screenstring += "symmetry "
        std_map["prec"] = str(geti("symprec"))
        # align all aggregates with their centers to (0,0,0) and their third and second
        # main axes to the x axis and the y axis, respectively, to improve symmetry
        # screening success
        std_map["ssalign"] = "b"
    if getf("energy_cutoff") > 0:
        if prescreen:
            screenstring += "and "
        screenstring += "energy "
        prescreen = True
    else:
        std_map.erase("ecutoff")

    step = 0
    if pgstep == step:
        nr_aggs = obmol.NumConformers()
        obmol = _get_pg(
            obmol,
            saveobmol,
            getb("subgroups"),
            not (getb("exclude_c1")),
            pgfilename,
            progress,
            postalign,
            pgregex,
        )
        if obmol.NumConformers() > nr_aggs:
            raise RuntimeError(
                "Number of conformers increased (%d -> %d) during pointgroup screening."
                % (nr_aggs, obmol.NumConformers()))
    if prescreen:
        nr_aggs = obmol.NumConformers()
        step += 1
        # First, only sort out those aggregates that do not pass the energy and symmetry
        # filter.
        if progress > 0:
            print("\n...starting " + screenstring + "pre-screening...\n")
        # perform the pre-screening
        success = simscreen.Do(obmol, "", std_map, in_out_options)
        if obmol.NumConformers() > nr_aggs:
            raise RuntimeError(
                "Number of conformers increased (%d -> %d) during symmetry screening."
                % (nr_aggs, obmol.NumConformers()))
        if not success:
            raise RuntimeError(
                "Error executing the SimScreen OBOp in OpenBabel.")
        if progress > 0:
            print("...%d aggregates passed %sfilter...\n\n" %
                  (obmol.NumConformers(), screenstring))
        # energy and symmetry screening have already been performed if they were desired
        # so do not do that again
        std_map.erase("ecutoff")
        std_map.erase("ssalign")
        std_map.erase("prec")
    else:
        print("\n...skipping energy and symmetry pre-screening...\n")
    if prescreen and pgstep == step:
        nr_aggs = obmol.NumConformers()
        obmol = _get_pg(
            obmol,
            saveobmol,
            getb("subgroups"),
            not (getb("exclude_c1")),
            pgfilename,
            progress,
            postalign,
            pgregex,
        )
        if obmol.NumConformers() > nr_aggs:
            raise RuntimeError(
                "Number of conformers increased (%d -> %d) during pointgroup screening."
                % (nr_aggs, obmol.NumConformers()))

    success = True
    maxstep = geti("maxscreensteps")
    # screen until fewer than nr_geometries agregates are left
    rmsd = getf("rmsd_min")
    rmsdstep = getf("rmsd_step")
    maxagg = geti("nr_geometries")
    while success and obmol.NumConformers() > maxagg and step < maxstep:
        step += 1
        nr_aggs = obmol.NumConformers()
        std_map["rcutoff"] = str(rmsd)
        success = simscreen.Do(obmol, "", std_map, in_out_options)
        if obmol.NumConformers() > nr_aggs and success:
            raise RuntimeError(
                "Number of conformers increased (%d -> %d) during screening step %d."
                % (nr_aggs, obmol.NumConformers(), step))
        if progress > 0:
            print(
                "...%d aggregates passed screening step %d at rmsd %f...\n\n" %
                (obmol.NumConformers(), step, rmsd))
        if pgstep == "last":
            nr_aggs = obmol.NumConformers()
            obmol = _get_pg(
                obmol,
                saveobmol,
                getb("subgroups"),
                not (getb("exclude_c1")),
                pgfilename,
                progress,
                postalign,
                pgregex,
            )
            if obmol.NumConformers() > nr_aggs:
                raise RuntimeError(
                    "Number of conformers increased (%d -> %d) during pointgroup screening."
                    % (nr_aggs, obmol.NumConformers()))
        rmsd += rmsdstep
        if pgstep == step:
            nr_aggs = obmol.NumConformers()
            obmol = _get_pg(
                obmol,
                saveobmol,
                getb("subgroups"),
                not (getb("exclude_c1")),
                pgfilename,
                progress,
                postalign,
                pgregex,
            )
            if obmol.NumConformers() > nr_aggs:
                raise RuntimeError(
                    "Number of conformers increased (%d -> %d) during pointgroup screening."
                    % (nr_aggs, obmol.NumConformers()))

    if not success:
        raise RuntimeError("Error executing the SimScreen OBOp in OpenBabel.")
    if step >= maxstep:
        print(
            "WARNING: maximum number of similarity screening steps exceeded",
            file=sys.stderr,
        )
    if success:
        if progress > 0:
            print("...%d aggregates passed screening..." %
                  (obmol.NumConformers()))

        # write all conformers that passed the filter to file
        if progress > 0:
            print("...writing %d aggregates to file %s..." %
                  (obmol.NumConformers(), gets("screened_xyz")))
        writefile = pybel.Outputfile("xyz",
                                     gets("screened_xyz"),
                                     overwrite=True)
        pybelmol = pybel.Molecule(obmol)
        nr_conformers = obmol.NumConformers()
        commentfunc = obmol.SetTitle
        setconffunc = obmol.SetConformer
        if postalign:
            alignfunc = obmol.Align
            aligncenter = double_array([0.0, 0.0, 0.0])
            alignaxis1 = double_array([1.0, 0.0, 0.0])
            alignaxis2 = double_array([0.0, 1.0, 0.0])
        for conf in range(nr_conformers):
            commentfunc("Conformer %d/%d" % (conf + 1, nr_conformers))
            setconffunc(conf)
            if postalign:
                alignfunc(aligncenter, alignaxis1, alignaxis2)
            writefile.write(pybelmol)
        writefile.close()
        if pgstep == "last":
            nr_aggs = obmol.NumConformers()
            obmol = _get_pg(
                obmol,
                saveobmol,
                getb("subgroups"),
                not (getb("exclude_c1")),
                pgfilename,
                progress,
                postalign,
                pgregex,
            )
            if obmol.NumConformers() > nr_aggs:
                raise RuntimeError(
                    "Number of conformers increased (%d -> %d) during pointgroup screening."
                    % (nr_aggs, obmol.NumConformers()))