def write_node_preview(pool, parent, chosen_idx): assert(chosen_idx == sorted(chosen_idx)) print "chosen_idx", chosen_idx trr_out_tmp_fn = mktemp(suffix='.trr') trr_out_tmp = open(trr_out_tmp_fn, "wb") trr_in = TrrFile(parent.trr_fn) curr_frame = trr_in.first_frame for i in chosen_idx: for dummy in range(i - curr_frame.number): curr_frame = curr_frame.next() assert(curr_frame.number == i) trr_out_tmp.write(curr_frame.raw_data) trr_in.close() trr_out_tmp.close() node_preview_fn = "node_preview_from_" + parent.name + ".pdb" cmd = ["trjconv", "-f", trr_out_tmp_fn, "-o", node_preview_fn, "-s", parent.pdb_fn, "-n", pool.ndx_fn] p = Popen(cmd, stdin=PIPE) p.communicate(input="MOI\n") assert(p.wait() == 0) os.remove(trr_out_tmp_fn) print "Node preview (MOI only) written to file: %s" % node_preview_fn
def get_merged_edr(node): # get list of edr files edr_fnames = sorted([ node.dir+"/"+fn for fn in os.listdir(node.dir) if re.match("[^#].+run\d+.edr", fn) ]) assert( len(edr_fnames) == node.extensions_max+1 ) # find out about trr time step trr = TrrFile(node.trr_fn) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() # dt is sometimes noisy in the final digits (three digits is femtosecond step = enough) dt = np.around(dt, decimals=3) time_offset = node.sampling_length+dt for edr_fn in edr_fnames[1:]: # adapt edr starting times cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"] print("Calling: "+(" ".join(cmd))) p = Popen(cmd, stdin=PIPE) p.communicate(input=(str(time_offset)+"\n")) assert(p.wait() == 0) time_offset += node.extensions_length+dt # concatenate edr files with adapted starting times cmd = ["eneconv", "-f"] + edr_fnames + ["-o", node.dir+"/ener.edr"] print("Calling: "+(" ".join(cmd))) p = Popen(cmd) retcode = p.wait() assert(retcode == 0)
def main(): options = options_desc.parse_args(sys.argv)[0] pool = Pool() needy_nodes = pool.where("state == 'merge-able'").multilock() if(len(needy_nodes) == 0): return # find out about trr time step dt = 0 nodeDir = needy_nodes[0].dir.split('/')[-1] for fn in os.listdir(needy_nodes[0].dir): if re.match("^"+nodeDir+".+run\d+\.trr", fn): trr = TrrFile(needy_nodes[0].dir+"/"+fn) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() break # dt is sometimes noisy in the final digits (three digits is femtosecond step = enough) dt = np.around(dt, decimals=3) for n in needy_nodes: if(options.trr): # merge sampling trajectories trr_fns = sorted([ fn for fn in os.listdir(n.dir) if re.match("[^#].+run\d+.trr", fn) ]) cmd = ["trjcat", "-f"] cmd += trr_fns cmd += ["-o", "../../"+n.trr_fn, "-cat"] print("Calling: %s"%" ".join(cmd)) check_call(cmd, cwd=n.dir) if(options.edr): # merge edr files # get list of edr-files edr_fnames = sorted([n.dir+"/"+fn for fn in os.listdir(n.dir) if re.match("[^#].+run\d+.edr", fn)]) assert( len(edr_fnames) == n.extensions_counter+1 ) assert( len(edr_fnames) == n.extensions_max+1 ) time_offset = n.sampling_length+dt for edr_fn in edr_fnames[1:]: # adapt edr starting times cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"] print("Calling: "+(" ".join(cmd))) p = Popen(cmd, stdin=PIPE) p.communicate(input=(str(time_offset)+"\n")) assert(p.wait() == 0) time_offset += n.extensions_length+dt # concatenate edr files with adapted starting times cmd = ["eneconv", "-f"] + edr_fnames + ["-o", n.dir+"/ener.edr"] print("Calling: "+(" ".join(cmd))) p = Popen(cmd) retcode = p.wait() assert(retcode == 0) needy_nodes.unlock()
def read_trajectory(self, fn): """ Reads a trr-trajectory, resolves periodic boundary conditions with L{PbcResolver} and calculates internal coordinates. @param fn: filename of a gromacs trr trajectory. @return: L{InternalArray} """ required_atoms = set(sum([c.atoms for c in self], ())) atoms_start = min(required_atoms) atoms_end = max(required_atoms) + 1 f_trr = TrrFile(fn) (frames_x, frames_box) = f_trr.read_frames(atoms_start, atoms_end, read_boxes=True) f_trr.close() pbc = PbcResolver(frames_box) def dx_provider(atom1, atom2): ai = frames_x[:, atom1 - atoms_start, :] aj = frames_x[:, atom2 - atoms_start, :] return (pbc.rvec_sub(ai, aj)) array = np.column_stack([c.from_externals(dx_provider) for c in self]) return (InternalArray(self, array))
def main(): if(len(sys.argv) != 3): print("Takes a trr-trajectory and converts it into a mat-file for matlab.") print("Usage: trr2mat.py <trr-input-file> <mat-output-file>") sys.exit(1) trr_fn = sys.argv[1] mat_fn = sys.argv[2] print("Opening: %s"%trr_fn) f = TrrFile(trr_fn) data = f.read_frames() f.close() print("Loaded trr-file with shape: "+str(data.shape)) print("Writing: %s"%mat_fn) savemat(mat_fn, {"trr":data})
def extract_frames(pool): needy_nodes = pool.where("state == 'created'") # we want to scan through a parent-trr only once - saves time parents = set([n.parent for n in needy_nodes]) for p in parents: childs = [n for n in needy_nodes if n.parent == p] childs.sort(key=lambda x: x.parent_frame_num) trr_in = TrrFile(p.trr_fn) frame = trr_in.first_frame for n in childs: for dummy in range(n.parent_frame_num - frame.number): frame = frame.next() assert (frame.number == n.parent_frame_num) trr_tmp_fn = mktemp(suffix='.trr') trr_tmp = open(trr_tmp_fn, "wb") trr_tmp.write(frame.raw_data) trr_tmp.close() cmd = [ "trjconv", "-f", trr_tmp_fn, "-o", n.pdb_fn, "-s", n.parent.pdb_fn ] p = Popen(cmd, stdin=PIPE) p.communicate(input="System\n") assert (p.wait() == 0) os.remove(trr_tmp_fn) trr_in.close() # Check if the right frames where extracted # In principle, PDB coordinates should have a precision of 1e-4 nm # beause they are given in Angström with three decimal places. for n in needy_nodes: a = pool.converter.read_pdb(n.pdb_fn) d = np.max(np.abs(n.internals.array - a.array)) print n.name + ": pdb vs internals deviation: %.2e" % np.max( np.abs(n.internals.array - a.array)) assert (1e-2 > d)
def read_trajectory(self, fn): """ Reads a trr-trajectory, resolves periodic boundary conditions with L{PbcResolver} and calculates internal coordinates. @param fn: filename of a gromacs trr trajectory. @return: L{InternalArray} """ required_atoms = set(sum([c.atoms for c in self], () )) atoms_start = min(required_atoms) atoms_end = max(required_atoms) + 1 f_trr = TrrFile(fn) (frames_x, frames_box) = f_trr.read_frames(atoms_start, atoms_end, read_boxes=True) f_trr.close() pbc = PbcResolver(frames_box) def dx_provider(atom1, atom2): ai = frames_x[:,atom1-atoms_start,:] aj = frames_x[:,atom2-atoms_start,:] return(pbc.rvec_sub(ai, aj)) array = np.column_stack([ c.from_externals(dx_provider) for c in self ]) return( InternalArray(self, array) )
def extract_frames(pool): needy_nodes = pool.where("state == 'created'") # we want to scan through a parent-trr only once - saves time parents = set([n.parent for n in needy_nodes]) for p in parents: childs = [n for n in needy_nodes if n.parent == p] childs.sort(key=lambda x: x.parent_frame_num) trr_in = TrrFile(p.trr_fn) frame = trr_in.first_frame for n in childs: for dummy in range(n.parent_frame_num - frame.number): frame = frame.next() assert(frame.number == n.parent_frame_num) trr_tmp_fn = mktemp(suffix='.trr') trr_tmp = open(trr_tmp_fn, "wb") trr_tmp.write(frame.raw_data) trr_tmp.close() cmd = ["trjconv", "-f", trr_tmp_fn, "-o", n.pdb_fn, "-s", n.parent.pdb_fn] p = Popen(cmd, stdin=PIPE) p.communicate(input="System\n") assert(p.wait() == 0) os.remove(trr_tmp_fn) trr_in.close() # Check if the right frames where extracted # In principle, PDB coordinates should have a precision of 1e-4 nm # beause they are given in Angström with three decimal places. for n in needy_nodes: a = pool.converter.read_pdb(n.pdb_fn) d = np.max(np.abs(n.internals.array - a.array)) print n.name+": pdb vs internals deviation: %.2e"%np.max(np.abs(n.internals.array - a.array)) assert(1e-2 > d)
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() npz_file = np.load(pool.chi_mat_fn) chi_matrix = npz_file['matrix'] node_names = npz_file['node_names'] n_clusters = npz_file['n_clusters'] active_nodes = [Node(nn) for nn in node_names] # create and open dest_files, intialize counters for statistics dest_filenames = [ pool.analysis_dir + "cluster%d.trr" % (c + 1) for c in range(n_clusters) ] dest_files = [open(fn, "wb") for fn in dest_filenames] dest_frame_counters = np.zeros(n_clusters) # For each active node... for (i, n) in enumerate(active_nodes): # ... find the clusters to which it belongs (might be more than one)... belonging_clusters = np.argwhere( chi_matrix[i] > options.node_threshold) # ... and find all typical frames of this node. #TODO not an optimal solution... discuss # per default, we take every frame with above average weight frame_threshold = options.frame_threshold * 2 * np.mean(n.frameweights) typical_frame_nums = np.argwhere(n.frameweights > frame_threshold) # Go through the node's trajectory ... trr_in = TrrFile(n.trr_fn) curr_frame = trr_in.first_frame for i in typical_frame_nums: # ...stop at each typical frame... while (i != curr_frame.number): curr_frame = curr_frame.next() assert (curr_frame.number == i) #... and copy it into the dest_file of each belonging cluster. for c in belonging_clusters: dest_files[c].write(curr_frame.raw_data) dest_frame_counters[c] += 1 trr_in.close() # close source file # close dest_files for f in dest_files: f.close() del (dest_files) # desolvate cluster-trajectories 'in-place' if (not options.write_sol): for dest_fn in dest_filenames: tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir) os.rename(dest_fn, tmp_fn) # works as both files are in same dir cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn] p = Popen(cmd, stdin=PIPE) p.communicate(input="MOI\n") assert (p.wait() == 0) os.remove(tmp_fn) # register dependencies for fn in dest_filenames: register_file_dependency(fn, pool.chi_mat_fn) # check number of written frames sys.stdout.write("Checking lenghts of written trajectories... ") for i in range(n_clusters): f = TrrFile(dest_filenames[i]) assert (f.count_frames() == dest_frame_counters[i]) f.close() print("done.") #output statistics print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f" % ( options.node_threshold, options.frame_threshold) print "Cluster trajectories were written to %s:" % pool.analysis_dir for (c, f) in enumerate(dest_frame_counters): print "cluster%d.trr [%d frames] from node(s):" % (c + 1, f) print list(np.argwhere(chi_matrix[:, c] > options.node_threshold).flat)
def main(): options = options_desc.parse_args(sys.argv)[0] if options.common_filename: options.molecule = options.common_filename + ".pdb" options.presampling = options.common_filename + ".trr" options.internals = options.common_filename + ".int" options.grompp = options.common_filename + ".mdp" options.topology = options.common_filename + ".top" options.index = options.common_filename + ".ndx" print("Options:\n%s\n" % pformat(eval(str(options)))) assert path.exists(options.molecule) assert path.exists(options.presampling) assert path.exists(options.internals) assert path.exists(options.grompp) assert path.exists(options.topology) # TODO: what if there is no index-file? (make_ndx) assert path.exists(options.index) assert "MOI" in gromacs.read_index_file(options.index), "group MOI should be defined in index file" # checks e.g. if the mdp-file looks good mdp_options = gromacs.read_mdp_file(options.grompp) # options we cannot fix for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]): assert int(ref_t) == options.temperature, "temperature in mdp file does not match ZIBgridfree temperature" # TODO drop options.temperature and get temperature directly from mdp file... ask again if temperature is above 310K # options we can fix mdp_options_dirty = False # if set, a new mdp-file will be written required_mdp_options = {"dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1"} for (k, v) in required_mdp_options.items(): if mdp_options.has_key(k): assert mdp_options[k] == v # check, if we would overwrite something else: mdp_options[k] = v mdp_options_dirty = True if mdp_options.has_key("energygrps"): assert "MOI" in [ str(egrp) for egrp in re.findall("[\S]+", mdp_options["energygrps"]) ], "group MOI should be among energygrps in mdp file" else: mdp_options["energygrps"] = "MOI" mdp_options_dirty = True a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy") if a and not b: mdp_options["nstenergy"] = mdp_options["nstxout"] mdp_options_dirty = True elif b and not a: mdp_options["nstxout"] = mdp_options["nstenergy"] mdp_options_dirty = True elif b and a: assert mdp_options["nstxout"] == mdp_options["nstenergy"], "nstxout should equal nstenergy" if int(mdp_options["nsteps"]) > 1e6: msg = "Number of MD-steps?" mdp_options["nsteps"] = str(userinput(msg, "int", default=int(mdp_options["nsteps"]))) # create a fixed mdp-file if mdp_options_dirty: print("Creating copy of mdp-file and adding missing options.") out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp" f = open(out_fn, "w") # append f.write("; Generated by zgf_create_pool\n") for i in mdp_options.items(): f.write("%s = %s\n" % i) f.write("; EOF\n") f.close() options.grompp = out_fn # check if subsampling is reasonable if os.path.getsize(options.presampling) > 100e6: # 100MB print("Presampling trajectory is large") trr = TrrFile(options.presampling) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() print("Presampling timestep is %.2f ps" % dt) if dt < 10: # picoseconds # TODO: maybe calculate subsampling factor individually, or ask? msg = "Subsample presampling trajectory by a tenth?" if userinput(msg, "bool"): out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr" cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"] check_call(cmd) options.presampling = out_fn # balance linears if options.balance_linears: print("Balance Linears") old_converter = Converter(options.internals) print("Loading presampling....") frames = old_converter.read_trajectory(options.presampling) new_coord_list = [] for c in old_converter: if not isinstance(c, LinearCoordinate): new_coord_list.append(c) continue # we do not work on other Coordinate-Types # TODO: is this a good way to determine new_weight and new_offset??? new_weight = c.weight / sqrt(2 * frames.var().getcoord(c)) new_offset = c.offset + frames.mean().getcoord(c) new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset) new_coord_list.append(new_coord) new_converter = Converter(coord_list=new_coord_list) assert old_converter.filename.endswith(".int") options.internals = old_converter.filename[:-4] + "_balanced.int" print("Writing balanced Converter to: " + options.internals) f = open(options.internals, "w") f.write(new_converter.serialize()) f.close() assert len(Converter(options.internals)) == len(new_coord_list) # try parsing # Finally: Create root-node and pool pool = Pool() if len(pool) != 0: print("ERROR: A pool already exists here.") sys.exit(1) pool.int_fn = options.internals pool.mdp_fn = options.grompp pool.top_fn = options.topology pool.ndx_fn = options.index pool.temperature = options.temperature pool.gr_threshold = options.gr_threshold pool.gr_chains = options.gr_chains pool.alpha = None pool.save() # save pool for the first time... # ... then we can save the first node... node0 = Node() node0.state = "refined" node0.save() # also creates the node directory ... needed for symlink os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn) os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn) pool.root_name = node0.name pool.save() # ... now we have to save the pool again. if not path.exists("analysis"): os.mkdir("analysis")
def main(): options = options_desc.parse_args(sys.argv)[0] if(options.common_filename): options.molecule = options.common_filename+".pdb" options.presampling = options.common_filename+".trr" options.internals = options.common_filename+".int" options.grompp = options.common_filename+".mdp" options.topology = options.common_filename+".top" options.index = options.common_filename+".ndx" print("Options:\n%s\n"%pformat(eval(str(options)))) assert(path.exists(options.molecule)) assert(path.exists(options.presampling)) assert(path.exists(options.internals)) assert(path.exists(options.grompp)) assert(path.exists(options.topology)) #TODO: what if there is no index-file? (make_ndx) assert(path.exists(options.index)) assert('moi' in gromacs.read_index_file(options.index)), "group 'MOI' should be defined in index file" # checks e.g. if the mdp-file looks good mdp_options = gromacs.read_mdp_file(options.grompp) temperatures = [ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])] assert(len(set(temperatures)) == 1), "temperature definition in mdp file is ambiguous" temperature = temperatures[0] # get sampling temperature from mdp file if(int(temperature) > 310): if not(userinput("Your sampling temperature is set to %s K. Continue?"%temperature, "bool")): sys.exit("Quit by user.") # options we can fix mdp_options_dirty = False #if set, a new mdp-file will be written # the value of the following options need to be fixed critical_mdp_options = {"dihre":"yes", "dihre_fc":"1", "disre":"simple", "disre_fc":"1", "gen_temp":temperature} for (k,v) in critical_mdp_options.items(): if(mdp_options.has_key(k) and mdp_options[k].strip() != v): print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file."%(mdp_options[k].strip(),k,v) sys.exit("Quitting.") else: mdp_options[k] = v mdp_options_dirty = True # the value of the following options does not matter, but they should be there noncritical_mdp_options = {"tcoupl":"no", "pcoupl":"no", "gen_vel":"no", "gen_seed":"-1"} for (k,v) in noncritical_mdp_options.items(): if not(mdp_options.has_key(k)): mdp_options[k] = v mdp_options_dirty = True a = mdp_options.has_key("energygrps") and "moi" not in [str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])] b = not(mdp_options.has_key("energygrps")) if(a or b): if not(userinput("'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")): sys.exit("Quit by user.") a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy") if(a and not b): mdp_options["nstenergy"] = mdp_options["nstxout"] mdp_options_dirty = True elif(b and not a): mdp_options["nstxout"] = mdp_options["nstenergy"] mdp_options_dirty = True elif(b and a): assert(mdp_options["nstxout"] == mdp_options["nstenergy"]), "nstxout should equal nstenergy" if(int(mdp_options["nsteps"]) > 1e6): msg = "Number of MD-steps?" mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"])) ) # create a fixed mdp-file if(mdp_options_dirty): print("Creating copy of mdp-file and adding missing options.") out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp" f = open(out_fn, "w") # append f.write("; Generated by zgf_create_pool\n") for i in sorted(mdp_options.items()): f.write("%s = %s\n"%i) f.write("; EOF\n") f.close() options.grompp = out_fn # check if subsampling is reasonable if(os.path.getsize(options.presampling) > 100e6): # 100MB print("Presampling trajectory is large") trr = TrrFile(options.presampling) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() print("Presampling timestep is %.2f ps"%dt) if(dt < 10): # picoseconds #TODO: maybe calculate subsampling factor individually, or ask? msg = "Subsample presampling trajectory by a tenth?" if(userinput(msg, "bool")): out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr" cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"] check_call(cmd) options.presampling = out_fn # balance linears if(options.balance_linears): print("Balance Linears") old_converter = Converter(options.internals) print("Loading presampling....") frames = old_converter.read_trajectory(options.presampling) new_coord_list = [] for c in old_converter: if(not isinstance(c, LinearCoordinate)): new_coord_list.append(c) continue # we do not work on other Coordinate-Types #TODO: is this a good way to determine new_weight and new_offset??? new_weight = c.weight / sqrt(2*frames.var().getcoord(c)) new_offset = c.offset + frames.mean().getcoord(c) new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset) new_coord_list.append(new_coord) new_converter = Converter(coord_list=new_coord_list) assert(old_converter.filename.endswith(".int")) options.internals = old_converter.filename[:-4] + "_balanced.int" print("Writing balanced Converter to: "+options.internals) f = open(options.internals, "w") f.write(new_converter.serialize()) f.close() assert(len(Converter(options.internals)) == len(new_coord_list)) #try parsing # Finally: Create root-node and pool pool = Pool() if(len(pool) != 0): print("ERROR: A pool already exists here.") sys.exit(1) pool.int_fn = options.internals pool.mdp_fn = options.grompp pool.top_fn = options.topology pool.ndx_fn = options.index pool.temperature = int(temperature) pool.gr_threshold = options.gr_threshold pool.gr_chains = options.gr_chains pool.alpha = None pool.save() # save pool for the first time... # ... then we can save the first node... node0 = Node() node0.state = "refined" node0.save() # also creates the node directory ... needed for symlink os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn) os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn) pool.root_name = node0.name pool.save() #... now we have to save the pool again. if(not path.exists("analysis")): os.mkdir("analysis")
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() npz_file = np.load(pool.chi_mat_fn) chi_matrix = npz_file['matrix'] node_names = npz_file['node_names'] n_clusters = npz_file['n_clusters'] active_nodes = [Node(nn) for nn in node_names] # create and open dest_files, intialize counters for statistics dest_filenames = [ pool.analysis_dir+"cluster%d.trr"%(c+1) for c in range(n_clusters) ] dest_files = [ open(fn, "wb") for fn in dest_filenames ] dest_frame_counters = np.zeros(n_clusters) # For each active node... for (i, n) in enumerate(active_nodes): # ... find the clusters to which it belongs (might be more than one)... belonging_clusters = np.argwhere(chi_matrix[i] > options.node_threshold) # ... and find all typical frames of this node. #TODO not an optimal solution... discuss # per default, we take every frame with above average weight frame_threshold = options.frame_threshold*2*np.mean(n.frameweights) typical_frame_nums = np.argwhere(n.frameweights > frame_threshold) # Go through the node's trajectory ... trr_in = TrrFile(n.trr_fn) curr_frame = trr_in.first_frame for i in typical_frame_nums: # ...stop at each typical frame... while(i != curr_frame.number): curr_frame = curr_frame.next() assert(curr_frame.number == i) #... and copy it into the dest_file of each belonging cluster. for c in belonging_clusters: dest_files[c].write(curr_frame.raw_data) dest_frame_counters[c] += 1 trr_in.close() # close source file # close dest_files for f in dest_files: f.close() del(dest_files) # desolvate cluster-trajectories 'in-place' if(not options.write_sol): for dest_fn in dest_filenames: tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir) os.rename(dest_fn, tmp_fn) # works as both files are in same dir cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn] p = Popen(cmd, stdin=PIPE) p.communicate(input="MOI\n") assert(p.wait() == 0) os.remove(tmp_fn) # register dependencies for fn in dest_filenames: register_file_dependency(fn, pool.chi_mat_fn) # check number of written frames sys.stdout.write("Checking lenghts of written trajectories... ") for i in range(n_clusters): f = TrrFile(dest_filenames[i]) assert(f.count_frames() == dest_frame_counters[i]) f.close() print("done.") #output statistics print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f"%(options.node_threshold, options.frame_threshold) print "Cluster trajectories were written to %s:"%pool.analysis_dir for (c, f) in enumerate(dest_frame_counters): print "cluster%d.trr [%d frames] from node(s):"%(c+1, f) print list(np.argwhere(chi_matrix[:,c] > options.node_threshold).flat)
def main(): options = options_desc.parse_args(sys.argv)[0] if (options.common_filename): options.molecule = options.common_filename + ".pdb" options.presampling = options.common_filename + ".trr" options.internals = options.common_filename + ".int" options.grompp = options.common_filename + ".mdp" options.topology = options.common_filename + ".top" options.index = options.common_filename + ".ndx" print("Options:\n%s\n" % pformat(eval(str(options)))) assert (path.exists(options.molecule)) assert (path.exists(options.presampling)) assert (path.exists(options.internals)) assert (path.exists(options.grompp)) assert (path.exists(options.topology)) #TODO: what if there is no index-file? (make_ndx) assert (path.exists(options.index)) assert ('moi' in gromacs.read_index_file( options.index)), "group 'MOI' should be defined in index file" # checks e.g. if the mdp-file looks good mdp_options = gromacs.read_mdp_file(options.grompp) temperatures = [ ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]) ] assert (len(set(temperatures)) == 1 ), "temperature definition in mdp file is ambiguous" temperature = temperatures[0] # get sampling temperature from mdp file if (int(temperature) > 310): if not (userinput( "Your sampling temperature is set to %s K. Continue?" % temperature, "bool")): sys.exit("Quit by user.") # options we can fix mdp_options_dirty = False #if set, a new mdp-file will be written # the value of the following options need to be fixed critical_mdp_options = { "dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1", "gen_temp": temperature } for (k, v) in critical_mdp_options.items(): if (mdp_options.has_key(k) and mdp_options[k].strip() != v): print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file." % ( mdp_options[k].strip(), k, v) sys.exit("Quitting.") else: mdp_options[k] = v mdp_options_dirty = True # the value of the following options does not matter, but they should be there noncritical_mdp_options = { "tcoupl": "no", "pcoupl": "no", "gen_vel": "no", "gen_seed": "-1" } for (k, v) in noncritical_mdp_options.items(): if not (mdp_options.has_key(k)): mdp_options[k] = v mdp_options_dirty = True a = mdp_options.has_key("energygrps") and "moi" not in [ str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"]) ] b = not (mdp_options.has_key("energygrps")) if (a or b): if not (userinput( "'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")): sys.exit("Quit by user.") a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy") if (a and not b): mdp_options["nstenergy"] = mdp_options["nstxout"] mdp_options_dirty = True elif (b and not a): mdp_options["nstxout"] = mdp_options["nstenergy"] mdp_options_dirty = True elif (b and a): assert (mdp_options["nstxout"] == mdp_options["nstenergy"] ), "nstxout should equal nstenergy" if (int(mdp_options["nsteps"]) > 1e6): msg = "Number of MD-steps?" mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"]))) # create a fixed mdp-file if (mdp_options_dirty): print("Creating copy of mdp-file and adding missing options.") out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp" f = open(out_fn, "w") # append f.write("; Generated by zgf_create_pool\n") for i in sorted(mdp_options.items()): f.write("%s = %s\n" % i) f.write("; EOF\n") f.close() options.grompp = out_fn # check if subsampling is reasonable if (os.path.getsize(options.presampling) > 100e6): # 100MB print("Presampling trajectory is large") trr = TrrFile(options.presampling) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() print("Presampling timestep is %.2f ps" % dt) if (dt < 10): # picoseconds #TODO: maybe calculate subsampling factor individually, or ask? msg = "Subsample presampling trajectory by a tenth?" if (userinput(msg, "bool")): out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr" cmd = [ "trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10" ] check_call(cmd) options.presampling = out_fn # balance linears if (options.balance_linears): print("Balance Linears") old_converter = Converter(options.internals) print("Loading presampling....") frames = old_converter.read_trajectory(options.presampling) new_coord_list = [] for c in old_converter: if (not isinstance(c, LinearCoordinate)): new_coord_list.append(c) continue # we do not work on other Coordinate-Types #TODO: is this a good way to determine new_weight and new_offset??? new_weight = c.weight / sqrt(2 * frames.var().getcoord(c)) new_offset = c.offset + frames.mean().getcoord(c) new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset) new_coord_list.append(new_coord) new_converter = Converter(coord_list=new_coord_list) assert (old_converter.filename.endswith(".int")) options.internals = old_converter.filename[:-4] + "_balanced.int" print("Writing balanced Converter to: " + options.internals) f = open(options.internals, "w") f.write(new_converter.serialize()) f.close() assert (len(Converter(options.internals)) == len(new_coord_list) ) #try parsing # Finally: Create root-node and pool pool = Pool() if (len(pool) != 0): print("ERROR: A pool already exists here.") sys.exit(1) pool.int_fn = options.internals pool.mdp_fn = options.grompp pool.top_fn = options.topology pool.ndx_fn = options.index pool.temperature = int(temperature) pool.gr_threshold = options.gr_threshold pool.gr_chains = options.gr_chains pool.alpha = None pool.save() # save pool for the first time... # ... then we can save the first node... node0 = Node() node0.state = "refined" node0.save() # also creates the node directory ... needed for symlink os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn) os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn) pool.root_name = node0.name pool.save() #... now we have to save the pool again. if (not path.exists("analysis")): os.mkdir("analysis")
def main(): options = options_desc.parse_args(sys.argv)[0] pool = Pool() needy_nodes = pool.where("state == 'merge-able'").multilock() if (len(needy_nodes) == 0): return # find out about trr time step dt = 0 nodeDir = needy_nodes[0].dir.split('/')[-1] for fn in os.listdir(needy_nodes[0].dir): if re.match("^" + nodeDir + ".+run\d+\.trr", fn): trr = TrrFile(needy_nodes[0].dir + "/" + fn) dt = trr.first_frame.next().t - trr.first_frame.t trr.close() break # dt is sometimes noisy in the final digits (three digits is femtosecond step = enough) dt = np.around(dt, decimals=3) for n in needy_nodes: if (options.trr): # merge sampling trajectories trr_fns = sorted([ fn for fn in os.listdir(n.dir) if re.match("[^#].+run\d+.trr", fn) ]) cmd = ["trjcat", "-f"] cmd += trr_fns cmd += ["-o", "../../" + n.trr_fn, "-cat"] print("Calling: %s" % " ".join(cmd)) check_call(cmd, cwd=n.dir) if (options.edr): # merge edr files # get list of edr-files edr_fnames = sorted([ n.dir + "/" + fn for fn in os.listdir(n.dir) if re.match("[^#].+run\d+.edr", fn) ]) assert (len(edr_fnames) == n.extensions_counter + 1) assert (len(edr_fnames) == n.extensions_max + 1) time_offset = n.sampling_length + dt for edr_fn in edr_fnames[1:]: # adapt edr starting times cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"] print("Calling: " + (" ".join(cmd))) p = Popen(cmd, stdin=PIPE) p.communicate(input=(str(time_offset) + "\n")) assert (p.wait() == 0) time_offset += n.extensions_length + dt # concatenate edr files with adapted starting times cmd = ["eneconv", "-f"] + edr_fnames + ["-o", n.dir + "/ener.edr"] print("Calling: " + (" ".join(cmd))) p = Popen(cmd) retcode = p.wait() assert (retcode == 0) needy_nodes.unlock()