def main(): options = options_desc.parse_args(sys.argv)[0] #TODO put somehow into Options, e.g. min_value=1 or required=True if(not options.doomed_nodes): sys.exit("Option --doomed_nodes is required.") pool = Pool() old_pool_size = len(pool) old_alpha = pool.alpha doomed_nodes = NodeList() #TODO: maybe this code should go into ZIBMolPy.ui for name in options.doomed_nodes.split(","): found = [n for n in pool if n.name == name] if(len(found) != 1): sys.exit("Coult not find node '%s'"%(name)) doomed_nodes.append(found[0]) for n in doomed_nodes: if(n == pool.root): sys.exit("Node %s is the root. Removal not allowed."%(n.name)) #if(len(n.children) > 0): # sys.exit("Node %s has children. Removal not allowed."%(n.name)) #TODO why should we forbid this? if not(userinput("The selected node(s) will be removed permanently. Continue?", "bool")): sys.exit("Quit by user.") assert(len(doomed_nodes) == len(doomed_nodes.multilock())) for n in doomed_nodes: print("Removing directory: "+n.dir) shutil.rmtree(n.dir) pool.reload_nodes() #TODO: this code-block also exists in zgf_create_node if(len(pool.where("isa_partition")) < 2): pool.alpha = None elif(options.methodalphas == "theta"): pool.alpha = zgf_create_nodes.calc_alpha_theta(pool) elif(options.methodalphas == "user"): pool.alpha = userinput("Please enter a value for alpha", "float") else: raise(Exception("Method unkown: "+options.methodalphas)) pool.history.append({'removed_nodes': [(n.name, n.state) for n in doomed_nodes], 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()}) pool.save() #TODO: deal with analysis dir and dependencies zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] pool = Pool() choice = "state in ('converged', 'refined')" if(options.ignore_convergence): choice = "state in ('converged', 'not-converged', 'refined')" needy_nodes = NodeList([n for n in pool.where(choice) if not n == pool.root]) # we won't touch the root if not(len(needy_nodes)): sys.exit("Nothing to do.") if not(userinput("Once the solvent has been removed, further refinement of the pool is not possible. This includes the generation of unrestrained transition nodes! Continue?", "bool")): sys.exit("Quit by user.") assert(len(needy_nodes) == len(needy_nodes.multilock())) # make sure we lock ALL nodes try: for n in needy_nodes: discard_solvent(n, "pdb") discard_solvent(n, "trr") for n in needy_nodes: n.unlock() except: traceback.print_exc()
def main(argv=None): if(argv==None): argv = sys.argv options = options_desc.parse_args(argv)[0] assert(not(options.refine_all and options.extend_all)) pool = Pool() needy_nodes = pool.where("isa_partition and is_sampled").multilock() # 1. Trying to detect fake convergence for n in pool.where("state == 'converged'"): means = kmeans(n.trajectory, k=2) d = (means[0] - means[1]).norm2() if(d > 2.0 and (options.refine_all or userinput("%s has converged but appears to have a bimodal distribution.\nDo you want to refine?"%n.name, "bool"))): #TODO decide upon threshold (per coordinate?) refine(n, options) # 2. Dealing with not-converged nodes for n in pool.where("state == 'not-converged'"): if(not(options.refine_all or options.extend_all)): choice = userchoice("%s has not converged. What do you want to do?"%n.name, ['_refine', '_extend', '_ignore']) if(options.refine_all or choice=="r"): refine(n, options) elif(options.extend_all or choice=="e"): extend(n) elif(choice=="i"): continue for n in needy_nodes: n.save() n.unlock() zgf_setup_nodes.main() zgf_grompp.main() zgf_cleanup.main()
def main(): (options, args) = options_desc.parse_args(sys.argv) assert (path.exists(options.input_topology)) print("Preprocessing (only local includes): %s ..." % options.input_topology) rawdata = preprocess(options.input_topology, includedirs=[]) #only local includes print("\nParsing...") top = Topology(rawdata) print("The topology contains:") for m in top.molecules: print(" %d molecule(s) of the moleculetype '%s'" % (m.mols, m.name)) #find candidates for molecules to merge candidates = [] for i in range(len(top.molecules) - 1): #check this and the next molecule is_candidate = True for m in top.molecules[i:i + 2]: is_candidate &= (m.mols == 1) uses = [n for n in top.molecules if n.name == m.name] is_candidate &= (len(uses) == 1) if (is_candidate): candidates.append(i) #pick a candidate if (len(candidates) == 0): print("Topology contains no mergable moleculetypes - abort.") sys.exit(1) elif (len(candidates) == 1): mt_index1 = candidates[0] print("Topology contains only one mergable pair of moleculetypes.") else: msg = "Only two consecutively molecultypes with mol=1 can be merged.\n" msg += "Choose index of first moleculetype.\n" for i in candidates: msg += "%d: %s\n" % (i, top.molecules[i].name) mt_index1 = userinput(msg, "int", condition="x in " + repr(candidates)) #print choosen moleculetypes mt_name1 = top.molecules[mt_index1].name mt_name2 = top.molecules[mt_index1 + 1].name print("Merging moleculetype '%s' with '%s'." % (mt_name1, mt_name2)) merge_moleculetypes(top, mt_name1, mt_name2) print("") print("The merged topology contains:") for m in top.molecules: print(" %d molecule(s) of the moleculetype '%s'" % (m.mols, m.name)) top_out_fn = options.output_topology print("Writting merged topology to " + top_out_fn) f = open(top_out_fn, "w") f.write(top.write()) f.close() print("DONE")
def main(): options = options_desc.parse_args(sys.argv)[0] pool = Pool() choice = "state in ('converged', 'refined')" if (options.ignore_convergence): choice = "state in ('converged', 'not-converged', 'refined')" needy_nodes = NodeList([ n for n in pool.where(choice) if not n == pool.root ]) # we won't touch the root if not (len(needy_nodes)): sys.exit("Nothing to do.") if not (userinput( "Once the solvent has been removed, further refinement of the pool is not possible. This includes the generation of unrestrained transition nodes! Continue?", "bool")): sys.exit("Quit by user.") assert (len(needy_nodes) == len(needy_nodes.multilock()) ) # make sure we lock ALL nodes try: for n in needy_nodes: discard_solvent(n, "pdb") discard_solvent(n, "trr") for n in needy_nodes: n.unlock() except: traceback.print_exc()
def main(): (options, args) = options_desc.parse_args(sys.argv) assert(path.exists(options.input_topology)) print("Preprocessing (only local includes): %s ..."%options.input_topology) rawdata = preprocess(options.input_topology, includedirs=[]) #only local includes print("\nParsing...") top = Topology(rawdata) print("The topology contains:") for m in top.molecules: print(" %d molecule(s) of the moleculetype '%s'"%(m.mols, m.name)) #find candidates for molecules to merge candidates = [] for i in range(len(top.molecules)-1): #check this and the next molecule is_candidate = True for m in top.molecules[i:i+2]: is_candidate &= (m.mols==1) uses = [n for n in top.molecules if n.name == m.name] is_candidate &= (len(uses)==1) if(is_candidate): candidates.append(i) #pick a candidate if(len(candidates) == 0): print("Topology contains no mergable moleculetypes - abort.") sys.exit(1) elif(len(candidates) == 1): mt_index1 = candidates[0] print("Topology contains only one mergable pair of moleculetypes.") else: msg = "Only two consecutively molecultypes with mol=1 can be merged.\n" msg += "Choose index of first moleculetype.\n" for i in candidates: msg += "%d: %s\n"%(i, top.molecules[i].name) mt_index1 = userinput(msg, "int", condition="x in "+repr(candidates)) #print choosen moleculetypes mt_name1 = top.molecules[mt_index1].name mt_name2 = top.molecules[mt_index1+1].name print("Merging moleculetype '%s' with '%s'."%(mt_name1, mt_name2)) merge_moleculetypes(top, mt_name1, mt_name2) print("") print("The merged topology contains:") for m in top.molecules: print(" %d molecule(s) of the moleculetype '%s'"%(m.mols, m.name)) top_out_fn = options.output_topology print("Writting merged topology to "+top_out_fn) f = open(top_out_fn, "w") f.write(top.write()) f.close() print("DONE")
def main(): options = options_desc.parse_args(sys.argv)[0] pool = Pool() needy_nodes = pool.where("state == 'grompp-able'") assert (len(needy_nodes) == len(needy_nodes.multilock()) ) # make sure we lock ALL nodes if (options.solv_model == "tip3p"): solv_box = "spc216.gro" solv_fn = "tip3p.itp" elif (options.solv_model == "tip4p"): solv_box = "tip4p.gro" solv_fn = "tip4p.itp" elif (options.solv_model == "tip4pew"): solv_box = "tip4p.gro" solv_fn = "tip4pew.itp" elif (options.solv_model == "tip5"): solv_box = "tip5p.gro" solv_fn = "tip5p.itp" elif (options.solv_model == "spc"): solv_box = "spc216.gro" solv_fn = "spc.itp" elif (options.solv_model == "spce"): solv_box = "spc216.gro" solv_fn = "spce.itp" elif ( options.solv_model == "acetonitrile" ): # TODO one might change this one to "custom" and let user enter name of template box solv_box = "acetonitrile.pdb" msg = "Topology update for acetonitrile is not supported. Proceed?" if not (userinput(msg, "bool")): for n in needy_nodes: n.unlock() return ("Quit by user.") # determine maximum length of linears, if any max_linear = query_linear_length(pool) # make box and fill with solvent genbox(pool, max_linear, options.bt, (options.box_x, options.box_y, options.box_z), solv_box) # update topology files (add solvent model and ions includes) if not (options.solv_model == "acetonitrile"): update_tops(pool, solv_fn) for n in needy_nodes: n.state = "em-grompp-able" zgf_grompp.call_grompp( n, mdp_file=options.grompp, final_state="em-mdrun-able" ) # re-grompp to get a tpr for energy minimization n.unlock()
def main(): options = options_desc.parse_args(sys.argv)[0] pool = Pool() needy_nodes = pool.where("state == 'grompp-able'") assert(len(needy_nodes) == len(needy_nodes.multilock())) # make sure we lock ALL nodes if(options.solv_model == "tip3p"): solv_box = "spc216.gro" solv_fn = "tip3p.itp" elif(options.solv_model == "tip4p"): solv_box = "tip4p.gro" solv_fn = "tip4p.itp" elif(options.solv_model == "tip4pew"): solv_box = "tip4p.gro" solv_fn = "tip4pew.itp" elif(options.solv_model == "tip5"): solv_box = "tip5p.gro" solv_fn = "tip5p.itp" elif(options.solv_model == "spc"): solv_box = "spc216.gro" solv_fn = "spc.itp" elif(options.solv_model == "spce"): solv_box = "spc216.gro" solv_fn = "spce.itp" elif(options.solv_model == "acetonitrile"): # TODO one might change this one to "custom" and let user enter name of template box solv_box = "acetonitrile.pdb" msg = "Topology update for acetonitrile is not supported. Proceed?" if not(userinput(msg, "bool")): for n in needy_nodes: n.unlock() return("Quit by user.") # determine maximum length of linears, if any max_linear = query_linear_length(pool) # make box and fill with solvent genbox(pool, max_linear, options.bt, (options.box_x, options.box_y, options.box_z), solv_box) # update topology files (add solvent model and ions includes) if not(options.solv_model == "acetonitrile"): update_tops(pool, solv_fn) for n in needy_nodes: n.state = "em-grompp-able" zgf_grompp.call_grompp(n, mdp_file=options.grompp, final_state="em-mdrun-able") # re-grompp to get a tpr for energy minimization n.unlock()
def main(argv=None): if (argv == None): argv = sys.argv options = options_desc.parse_args(argv)[0] assert (not (options.refine_all and options.extend_all)) pool = Pool() needy_nodes = pool.where("isa_partition and is_sampled").multilock() # 1. Trying to detect fake convergence for n in pool.where("state == 'converged'"): means = kmeans(n.trajectory, k=2) d = (means[0] - means[1]).norm2() if (d > 2.0 and (options.refine_all or userinput( "%s has converged but appears to have a bimodal distribution.\nDo you want to refine?" % n.name, "bool"))): #TODO decide upon threshold (per coordinate?) refine(n, options) # 2. Dealing with not-converged nodes for n in pool.where("state == 'not-converged'"): if (not (options.refine_all or options.extend_all)): choice = userchoice( "%s has not converged. What do you want to do?" % n.name, ['_refine', '_extend', '_ignore']) if (options.refine_all or choice == "r"): refine(n, options) elif (options.extend_all or choice == "e"): extend(n) elif (choice == "i"): continue for n in needy_nodes: n.save() n.unlock() zgf_setup_nodes.main() zgf_grompp.main() zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] if options.common_filename: options.molecule = options.common_filename + ".pdb" options.presampling = options.common_filename + ".trr" options.internals = options.common_filename + ".int" options.grompp = options.common_filename + ".mdp" options.topology = options.common_filename + ".top" options.index = options.common_filename + ".ndx" print("Options:\n%s\n" % pformat(eval(str(options)))) assert path.exists(options.molecule) assert path.exists(options.presampling) assert path.exists(options.internals) assert path.exists(options.grompp) assert path.exists(options.topology) # TODO: what if there is no index-file? (make_ndx) assert path.exists(options.index) assert "MOI" in gromacs.read_index_file(options.index), "group MOI should be defined in index file" # checks e.g. if the mdp-file looks good mdp_options = gromacs.read_mdp_file(options.grompp) # options we cannot fix for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]): assert int(ref_t) == options.temperature, "temperature in mdp file does not match ZIBgridfree temperature" # TODO drop options.temperature and get temperature directly from mdp file... ask again if temperature is above 310K # options we can fix mdp_options_dirty = False # if set, a new mdp-file will be written required_mdp_options = {"dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1"} for (k, v) in required_mdp_options.items(): if mdp_options.has_key(k): assert mdp_options[k] == v # check, if we would overwrite something else: mdp_options[k] = v mdp_options_dirty = True if mdp_options.has_key("energygrps"): assert "MOI" in [ str(egrp) for egrp in re.findall("[\S]+", mdp_options["energygrps"]) ], "group MOI should be among energygrps in mdp file" else: mdp_options["energygrps"] = "MOI" mdp_options_dirty = True a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy") if a and not b: mdp_options["nstenergy"] = mdp_options["nstxout"] mdp_options_dirty = True elif b and not a: mdp_options["nstxout"] = mdp_options["nstenergy"] mdp_options_dirty = True elif b and a: assert mdp_options["nstxout"] == mdp_options["nstenergy"], "nstxout should equal nstenergy" if int(mdp_options["nsteps"]) > 1e6: msg = "Number of MD-steps?" mdp_options["nsteps"] = str(userinput(msg, "int", default=int(mdp_options["nsteps"]))) # create a fixed mdp-file if mdp_options_dirty: print("Creating copy of mdp-file and adding missing options.") out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp" f = open(out_fn, "w") # append f.write("; Generated by zgf_create_pool\n") for i in mdp_options.items(): f.write("%s = %s\n" % i) f.write("; EOF\n") f.close() options.grompp = out_fn # check if subsampling is reasonable if os.path.getsize(options.presampling) > 100e6: # 100MB print("Presampling trajectory is large") trr = TrrFile(options.presampling) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() print("Presampling timestep is %.2f ps" % dt) if dt < 10: # picoseconds # TODO: maybe calculate subsampling factor individually, or ask? msg = "Subsample presampling trajectory by a tenth?" if userinput(msg, "bool"): out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr" cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"] check_call(cmd) options.presampling = out_fn # balance linears if options.balance_linears: print("Balance Linears") old_converter = Converter(options.internals) print("Loading presampling....") frames = old_converter.read_trajectory(options.presampling) new_coord_list = [] for c in old_converter: if not isinstance(c, LinearCoordinate): new_coord_list.append(c) continue # we do not work on other Coordinate-Types # TODO: is this a good way to determine new_weight and new_offset??? new_weight = c.weight / sqrt(2 * frames.var().getcoord(c)) new_offset = c.offset + frames.mean().getcoord(c) new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset) new_coord_list.append(new_coord) new_converter = Converter(coord_list=new_coord_list) assert old_converter.filename.endswith(".int") options.internals = old_converter.filename[:-4] + "_balanced.int" print("Writing balanced Converter to: " + options.internals) f = open(options.internals, "w") f.write(new_converter.serialize()) f.close() assert len(Converter(options.internals)) == len(new_coord_list) # try parsing # Finally: Create root-node and pool pool = Pool() if len(pool) != 0: print("ERROR: A pool already exists here.") sys.exit(1) pool.int_fn = options.internals pool.mdp_fn = options.grompp pool.top_fn = options.topology pool.ndx_fn = options.index pool.temperature = options.temperature pool.gr_threshold = options.gr_threshold pool.gr_chains = options.gr_chains pool.alpha = None pool.save() # save pool for the first time... # ... then we can save the first node... node0 = Node() node0.state = "refined" node0.save() # also creates the node directory ... needed for symlink os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn) os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn) pool.root_name = node0.name pool.save() # ... now we have to save the pool again. if not path.exists("analysis"): os.mkdir("analysis")
def main(): options = options_desc.parse_args(sys.argv)[0] if(options.common_filename): options.molecule = options.common_filename+".pdb" options.presampling = options.common_filename+".trr" options.internals = options.common_filename+".int" options.grompp = options.common_filename+".mdp" options.topology = options.common_filename+".top" options.index = options.common_filename+".ndx" print("Options:\n%s\n"%pformat(eval(str(options)))) assert(path.exists(options.molecule)) assert(path.exists(options.presampling)) assert(path.exists(options.internals)) assert(path.exists(options.grompp)) assert(path.exists(options.topology)) #TODO: what if there is no index-file? (make_ndx) assert(path.exists(options.index)) assert('moi' in gromacs.read_index_file(options.index)), "group 'MOI' should be defined in index file" # checks e.g. if the mdp-file looks good mdp_options = gromacs.read_mdp_file(options.grompp) temperatures = [ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])] assert(len(set(temperatures)) == 1), "temperature definition in mdp file is ambiguous" temperature = temperatures[0] # get sampling temperature from mdp file if(int(temperature) > 310): if not(userinput("Your sampling temperature is set to %s K. Continue?"%temperature, "bool")): sys.exit("Quit by user.") # options we can fix mdp_options_dirty = False #if set, a new mdp-file will be written # the value of the following options need to be fixed critical_mdp_options = {"dihre":"yes", "dihre_fc":"1", "disre":"simple", "disre_fc":"1", "gen_temp":temperature} for (k,v) in critical_mdp_options.items(): if(mdp_options.has_key(k) and mdp_options[k].strip() != v): print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file."%(mdp_options[k].strip(),k,v) sys.exit("Quitting.") else: mdp_options[k] = v mdp_options_dirty = True # the value of the following options does not matter, but they should be there noncritical_mdp_options = {"tcoupl":"no", "pcoupl":"no", "gen_vel":"no", "gen_seed":"-1"} for (k,v) in noncritical_mdp_options.items(): if not(mdp_options.has_key(k)): mdp_options[k] = v mdp_options_dirty = True a = mdp_options.has_key("energygrps") and "moi" not in [str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])] b = not(mdp_options.has_key("energygrps")) if(a or b): if not(userinput("'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")): sys.exit("Quit by user.") a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy") if(a and not b): mdp_options["nstenergy"] = mdp_options["nstxout"] mdp_options_dirty = True elif(b and not a): mdp_options["nstxout"] = mdp_options["nstenergy"] mdp_options_dirty = True elif(b and a): assert(mdp_options["nstxout"] == mdp_options["nstenergy"]), "nstxout should equal nstenergy" if(int(mdp_options["nsteps"]) > 1e6): msg = "Number of MD-steps?" mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"])) ) # create a fixed mdp-file if(mdp_options_dirty): print("Creating copy of mdp-file and adding missing options.") out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp" f = open(out_fn, "w") # append f.write("; Generated by zgf_create_pool\n") for i in sorted(mdp_options.items()): f.write("%s = %s\n"%i) f.write("; EOF\n") f.close() options.grompp = out_fn # check if subsampling is reasonable if(os.path.getsize(options.presampling) > 100e6): # 100MB print("Presampling trajectory is large") trr = TrrFile(options.presampling) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() print("Presampling timestep is %.2f ps"%dt) if(dt < 10): # picoseconds #TODO: maybe calculate subsampling factor individually, or ask? msg = "Subsample presampling trajectory by a tenth?" if(userinput(msg, "bool")): out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr" cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"] check_call(cmd) options.presampling = out_fn # balance linears if(options.balance_linears): print("Balance Linears") old_converter = Converter(options.internals) print("Loading presampling....") frames = old_converter.read_trajectory(options.presampling) new_coord_list = [] for c in old_converter: if(not isinstance(c, LinearCoordinate)): new_coord_list.append(c) continue # we do not work on other Coordinate-Types #TODO: is this a good way to determine new_weight and new_offset??? new_weight = c.weight / sqrt(2*frames.var().getcoord(c)) new_offset = c.offset + frames.mean().getcoord(c) new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset) new_coord_list.append(new_coord) new_converter = Converter(coord_list=new_coord_list) assert(old_converter.filename.endswith(".int")) options.internals = old_converter.filename[:-4] + "_balanced.int" print("Writing balanced Converter to: "+options.internals) f = open(options.internals, "w") f.write(new_converter.serialize()) f.close() assert(len(Converter(options.internals)) == len(new_coord_list)) #try parsing # Finally: Create root-node and pool pool = Pool() if(len(pool) != 0): print("ERROR: A pool already exists here.") sys.exit(1) pool.int_fn = options.internals pool.mdp_fn = options.grompp pool.top_fn = options.topology pool.ndx_fn = options.index pool.temperature = int(temperature) pool.gr_threshold = options.gr_threshold pool.gr_chains = options.gr_chains pool.alpha = None pool.save() # save pool for the first time... # ... then we can save the first node... node0 = Node() node0.state = "refined" node0.save() # also creates the node directory ... needed for symlink os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn) os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn) pool.root_name = node0.name pool.save() #... now we have to save the pool again. if(not path.exists("analysis")): os.mkdir("analysis")
def main(argv=None): if(argv==None): argv = sys.argv options = options_desc.parse_args(argv)[0] print("Options:\n%s\n"%pformat(eval(str(options)))) if(options.random_seed): # using numpy-random because python-random differs beetween 32 and 64 bit np.random.seed(hash(options.random_seed)) pool = Pool() old_pool_size = len(pool) print "pool", pool if(options.parent_node == "root"): parent = pool.root else: found = [n for n in pool if n.name == options.parent_node] assert(len(found) == 1) parent = found[0] print "### Generate nodes: %s ###" % options.methodnodes if(options.methodnodes == "kmeans"): chosen_idx = mknodes_kmeans(parent, options.numnodes) elif(options.methodnodes == "equidist"): chosen_idx = mknodes_equidist(parent, options.numnodes) elif(options.methodnodes == "maxdist"): chosen_idx = mknodes_maxdist(parent, options.numnodes) elif(options.methodnodes == "all"): chosen_idx = mknodes_all(parent) else: raise(Exception("Method unknown: "+options.methodnodes)) chosen_idx.sort() # makes preview-trajectory easier to understand if(options.write_preview): write_node_preview(pool, parent, chosen_idx) for i in chosen_idx: n = Node() n.parent_frame_num = i n.parent = parent n.state = "creating-a-partition" # will be set to "created" at end of script n.extensions_counter = 0 n.extensions_max = options.ext_max n.extensions_length = options.ext_length n.sampling_length = options.sampling_length n.internals = parent.trajectory.getframe(i) pool.append(n) print "\n### Obtain alpha: %s ###" % options.methodalphas old_alpha = pool.alpha if(options.methodalphas == "theta"): pool.alpha = calc_alpha_theta(pool) elif(options.methodalphas == "user"): pool.alpha = userinput("Please enter a value for alpha", "float") else: raise(Exception("Method unknown: "+options.methodalphas)) pool.history.append({'refined_node': (parent.name, parent.state), 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()}) pool.save() # alpha might have changed print "\n### Obtain phi fit: %s ###" % options.methodphifit if(options.methodphifit == "harmonic"): do_phifit_harmonic(pool) elif(options.methodphifit == "switch"): do_phifit_switch(pool) elif(options.methodphifit == "leastsq"): do_phifit_leastsq(pool) else: raise(Exception("Method unkown: "+options.methodphifit)) for n in pool.where("state == 'creating-a-partition'"): n.state = "created" n.save() print "saving " +str(n) zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() active_nodes = pool.where("isa_partition") if(options.ignore_failed): active_nodes = pool.where("isa_partition and not state=='mdrun-failed'") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): active_nodes.unlock() sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.") print "\n### Getting S matrix ..." s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat, fast=options.fast_mat) register_file_dependency(pool.s_mat_fn, pool.filename) node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "\n### Symmetrizing S matrix ..." (corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error)) # store intermediate results register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn) np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes]) if options.export_matlab: savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights}) savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix}) for (n, cw) in zip(active_nodes, corr_node_weights): n.obs.weight_corrected = cw print "\n### Node weights after symmetrization of S matrix:" for n in active_nodes: print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected)) n.save() active_nodes.unlock() # calculate and sort eigenvalues in descending order (eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix) argsorted_eigvalues = np.argsort(-eigvalues) eigvalues = eigvalues[argsorted_eigvalues] eigvectors = eigvectors[:, argsorted_eigvalues] gaps = np.abs(eigvalues[1:]-eigvalues[:-1]) gaps = np.append(gaps, 0.0) wgaps = gaps*eigvalues print "\n### Sorted eigenvalues of symmetrized S matrix:" for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps): print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap) n_clusters = np.argmax(wgaps)+1 print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters) print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1) sys.stdout.flush() if not options.auto_cluster: n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0") print "### Using %d clusters for PCCA+ ..."%n_clusters if options.export_matlab: savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors}) # orthogonalize and normalize eigenvectors eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights) # perform PCCA+ # First two return-values "c_f" and "indicator" are not needed (chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:] if(options.optimize_chi): print "\n### Optimizing chi matrix ..." outliers = 5 mean_weight = np.mean(corr_node_weights) threshold = mean_weight/100*outliers print "Light-weight node threshold (%d%% of mean corrected node weight): %.4f."%(outliers, threshold) # accumulate nodes for optimization edges = np.where(np.max(chi_matrix, axis=1) > 0.9999)[0] # edges of simplex heavies = np.where( corr_node_weights > threshold)[0] # heavy-weight nodes filtered_eigvectors = eigvectors[ np.union1d(edges, heavies) ] # perform the actual optimization rot_matrix = opt_soft(filtered_eigvectors, rot_matrix, n_clusters) chi_matrix = np.dot(eigvectors[:,:n_clusters], rot_matrix) # deal with light-weight nodes: shift and scale for i in np.where(corr_node_weights <= threshold)[0]: if(i in edges): print "Column %d belongs to (potentially dangerous) light-weight node, but its node is a simplex edge."%(i+1) continue print "Column %d is shifted and scaled."%(i+1) col_min = np.min( chi_matrix[i,:] ) chi_matrix[i,:] -= col_min chi_matrix[i,:] /= 1-(n_clusters*col_min) qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters) cluster_weights = rot_matrix[0] print "\n### Matrix numerics check" print "-- Q_c matrix row sums --" print np.sum(qc_matrix, axis=1) print "-- cluster weights: first column of rot_matrix --" print cluster_weights print "-- cluster weights: numpy.dot(node_weights, chi_matrix) --" print np.dot(corr_node_weights, chi_matrix) print "-- chi matrix column max values --" print np.max(chi_matrix, axis=0) print "-- chi matrix row sums --" print np.sum(chi_matrix, axis=1) # store final results np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes]) np.savez(pool.qc_mat_fn, matrix=qc_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights) if options.export_matlab: savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix}) savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights}) register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn) register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn) for fn in (pool.s_mat_fn, pool.s_corr_mat_fn): register_file_dependency(pool.chi_mat_fn, fn) register_file_dependency(pool.qc_mat_fn, fn) # touch analysis directory (triggering update in zgf_browser) atime = mtime = time.time() os.utime(pool.analysis_dir, (atime, mtime)) # show summary if(options.summary): print "\n### Preparing cluster summary ..." chi_threshold = 1E-3 from pprint import pformat for i in range(n_clusters): involved_nodes = [active_nodes[ni] for ni in np.argwhere(chi_matrix[:,i] > chi_threshold)] max_chi_node = active_nodes[ np.argmax(chi_matrix[:,i]) ] c_max = [] for c in pool.converter: coord_range = pool.coord_range(c) scale = c.plot_scale edges = scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50)) hist_cluster = np.zeros(edges.size-1) for (n, chi) in zip([n for n in active_nodes], chi_matrix[:,i]): samples = scale( n.trajectory.getcoord(c) ) hist_node = np.histogram(samples, bins=edges, weights=n.frameweights, normed=True)[0] hist_cluster += n.obs.weight_corrected * hist_node * chi c_max.append( scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))[np.argmax(hist_cluster)] ) msg = "### Cluster %d (weight=%.4f, #involved nodes=%d, representative='%s'):"%(i+1, cluster_weights[i], len(involved_nodes), max_chi_node.name) print "\n"+msg print "-- internal coordinates --" print "%s"%pformat(["%.2f"%cm for cm in c_max]) print "-- involved nodes --" print "%s"%pformat([n.name for n in involved_nodes]) print "-"*len(msg)
def main(): options = options_desc.parse_args(sys.argv)[0] if (options.common_filename): options.molecule = options.common_filename + ".pdb" options.presampling = options.common_filename + ".trr" options.internals = options.common_filename + ".int" options.grompp = options.common_filename + ".mdp" options.topology = options.common_filename + ".top" options.index = options.common_filename + ".ndx" print("Options:\n%s\n" % pformat(eval(str(options)))) assert (path.exists(options.molecule)) assert (path.exists(options.presampling)) assert (path.exists(options.internals)) assert (path.exists(options.grompp)) assert (path.exists(options.topology)) #TODO: what if there is no index-file? (make_ndx) assert (path.exists(options.index)) assert ('moi' in gromacs.read_index_file( options.index)), "group 'MOI' should be defined in index file" # checks e.g. if the mdp-file looks good mdp_options = gromacs.read_mdp_file(options.grompp) temperatures = [ ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]) ] assert (len(set(temperatures)) == 1 ), "temperature definition in mdp file is ambiguous" temperature = temperatures[0] # get sampling temperature from mdp file if (int(temperature) > 310): if not (userinput( "Your sampling temperature is set to %s K. Continue?" % temperature, "bool")): sys.exit("Quit by user.") # options we can fix mdp_options_dirty = False #if set, a new mdp-file will be written # the value of the following options need to be fixed critical_mdp_options = { "dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1", "gen_temp": temperature } for (k, v) in critical_mdp_options.items(): if (mdp_options.has_key(k) and mdp_options[k].strip() != v): print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file." % ( mdp_options[k].strip(), k, v) sys.exit("Quitting.") else: mdp_options[k] = v mdp_options_dirty = True # the value of the following options does not matter, but they should be there noncritical_mdp_options = { "tcoupl": "no", "pcoupl": "no", "gen_vel": "no", "gen_seed": "-1" } for (k, v) in noncritical_mdp_options.items(): if not (mdp_options.has_key(k)): mdp_options[k] = v mdp_options_dirty = True a = mdp_options.has_key("energygrps") and "moi" not in [ str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"]) ] b = not (mdp_options.has_key("energygrps")) if (a or b): if not (userinput( "'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")): sys.exit("Quit by user.") a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy") if (a and not b): mdp_options["nstenergy"] = mdp_options["nstxout"] mdp_options_dirty = True elif (b and not a): mdp_options["nstxout"] = mdp_options["nstenergy"] mdp_options_dirty = True elif (b and a): assert (mdp_options["nstxout"] == mdp_options["nstenergy"] ), "nstxout should equal nstenergy" if (int(mdp_options["nsteps"]) > 1e6): msg = "Number of MD-steps?" mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"]))) # create a fixed mdp-file if (mdp_options_dirty): print("Creating copy of mdp-file and adding missing options.") out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp" f = open(out_fn, "w") # append f.write("; Generated by zgf_create_pool\n") for i in sorted(mdp_options.items()): f.write("%s = %s\n" % i) f.write("; EOF\n") f.close() options.grompp = out_fn # check if subsampling is reasonable if (os.path.getsize(options.presampling) > 100e6): # 100MB print("Presampling trajectory is large") trr = TrrFile(options.presampling) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() print("Presampling timestep is %.2f ps" % dt) if (dt < 10): # picoseconds #TODO: maybe calculate subsampling factor individually, or ask? msg = "Subsample presampling trajectory by a tenth?" if (userinput(msg, "bool")): out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr" cmd = [ "trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10" ] check_call(cmd) options.presampling = out_fn # balance linears if (options.balance_linears): print("Balance Linears") old_converter = Converter(options.internals) print("Loading presampling....") frames = old_converter.read_trajectory(options.presampling) new_coord_list = [] for c in old_converter: if (not isinstance(c, LinearCoordinate)): new_coord_list.append(c) continue # we do not work on other Coordinate-Types #TODO: is this a good way to determine new_weight and new_offset??? new_weight = c.weight / sqrt(2 * frames.var().getcoord(c)) new_offset = c.offset + frames.mean().getcoord(c) new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset) new_coord_list.append(new_coord) new_converter = Converter(coord_list=new_coord_list) assert (old_converter.filename.endswith(".int")) options.internals = old_converter.filename[:-4] + "_balanced.int" print("Writing balanced Converter to: " + options.internals) f = open(options.internals, "w") f.write(new_converter.serialize()) f.close() assert (len(Converter(options.internals)) == len(new_coord_list) ) #try parsing # Finally: Create root-node and pool pool = Pool() if (len(pool) != 0): print("ERROR: A pool already exists here.") sys.exit(1) pool.int_fn = options.internals pool.mdp_fn = options.grompp pool.top_fn = options.topology pool.ndx_fn = options.index pool.temperature = int(temperature) pool.gr_threshold = options.gr_threshold pool.gr_chains = options.gr_chains pool.alpha = None pool.save() # save pool for the first time... # ... then we can save the first node... node0 = Node() node0.state = "refined" node0.save() # also creates the node directory ... needed for symlink os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn) os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn) pool.root_name = node0.name pool.save() #... now we have to save the pool again. if (not path.exists("analysis")): os.mkdir("analysis")
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() active_nodes = pool.where("isa_partition") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): active_nodes.unlock() sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.") print "\n### Getting S matrix ..." s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat) register_file_dependency(pool.s_mat_fn, pool.filename) print "\n### Getting K matrix ..." k_matrix = cache_matrix(pool.k_mat_fn, active_nodes, shift=options.lag_time, overwrite=options.overwrite_mat) register_file_dependency(pool.k_mat_fn, pool.filename) node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "\n### Symmetrizing S matrix ..." (corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error)) print "\n### Symmetrizing K matrix ..." (corr_k_matrix, corr_node_weights) = symmetrize(k_matrix, corr_node_weights) # store intermediate results register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn) register_file_dependency(pool.k_corr_mat_fn, pool.k_mat_fn) np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes]) np.savez(pool.k_corr_mat_fn, matrix=corr_k_matrix, node_names=[n.name for n in active_nodes]) if options.export_matlab: savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights}) savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix}) savemat(pool.analysis_dir+"k_mats.mat", {"k_matrix":k_matrix, "k_matrix_corrected":corr_k_matrix}) for (n, cw) in zip(active_nodes, corr_node_weights): n.obs.weight_corrected = cw print "\n### Node weights after symmetrization of S matrix:" for n in active_nodes: print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected)) n.save() active_nodes.unlock() # calculate and sort eigenvalues in descending order (eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix) argsorted_eigvalues = np.argsort(-eigvalues) eigvalues = eigvalues[argsorted_eigvalues] eigvectors = eigvectors[:, argsorted_eigvalues] gaps = np.abs(eigvalues[1:]-eigvalues[:-1]) gaps = np.append(gaps, 0.0) wgaps = gaps*eigvalues print "\n### Sorted eigenvalues of symmetrized S matrix:" for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps): print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap) n_clusters = np.argmax(wgaps)+1 print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters) print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1) sys.stdout.flush() if not options.auto_cluster: n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0") print "### Using %d clusters for PCCA+ ..."%n_clusters print "eigenvectors" print eigvectors[:, :n_clusters] if options.export_matlab: savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors}) # orthogonalize and normalize eigenvectors eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights) # perform PCCA+ # First two return-values "c_f" and "indicator" are not needed (chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:] #TODO at the moment, K-matrix is not used #xi = [] # calculate eigenvalues of Q_c, xi #for eigvec in np.transpose(eigvectors)[: n_clusters]: # num = np.dot( np.dot( np.transpose(eigvec), corr_k_matrix ), eigvec ) # denom = np.dot( np.dot( np.transpose(eigvec), corr_s_matrix ), eigvec ) # xi.append(num/denom-1) #print np.diag(xi) #TODO what does this tell us? Marcus-check qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters) cluster_weights = rot_matrix[0] print "Q_c matrix:" print qc_matrix print "Q_c matrix row sums:" print np.sum(qc_matrix, axis=1) print "cluster weights (calculated twice for checking):" print cluster_weights print np.dot(corr_node_weights, chi_matrix) print "chi matrix column sums:" print np.sum(chi_matrix, axis=0) print "chi matrix row sums:" print np.sum(chi_matrix, axis=1) # store final results np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes]) np.savez(pool.qc_mat_fn, matrix=qc_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights) if options.export_matlab: savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix}) savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights}) register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn) register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn) for fn in (pool.s_mat_fn, pool.s_corr_mat_fn, pool.k_mat_fn, pool.k_corr_mat_fn): register_file_dependency(pool.chi_mat_fn, fn) register_file_dependency(pool.qc_mat_fn, fn) zgf_cleanup.main()