def main():

	options = options_desc.parse_args(sys.argv)[0]
	pool = Pool()

	needy_nodes = pool.where("state == 'merge-able'").multilock()

	if(len(needy_nodes) == 0):
		return
	
	# find out about trr time step
	dt = 0	
	nodeDir = needy_nodes[0].dir.split('/')[-1]
	for fn in os.listdir(needy_nodes[0].dir):
		if re.match("^"+nodeDir+".+run\d+\.trr", fn):
			trr = TrrFile(needy_nodes[0].dir+"/"+fn)			
			dt = trr.first_frame.next().t - trr.first_frame.t
			trr.close()
			break

	# dt is sometimes noisy in the final digits (three digits is femtosecond step = enough)
	dt = np.around(dt, decimals=3)
	for n in needy_nodes:

		if(options.trr):
			# merge sampling trajectories
			trr_fns = sorted([ fn for fn in os.listdir(n.dir) if re.match("[^#].+run\d+.trr", fn) ])
			cmd = ["trjcat", "-f"]
			cmd += trr_fns
			cmd += ["-o", "../../"+n.trr_fn, "-cat"]
			print("Calling: %s"%" ".join(cmd))
			check_call(cmd, cwd=n.dir)

		if(options.edr):
			# merge edr files
			# get list of edr-files
			edr_fnames = sorted([n.dir+"/"+fn for fn in os.listdir(n.dir) if re.match("[^#].+run\d+.edr", fn)])
			assert( len(edr_fnames) ==  n.extensions_counter+1 )
			assert( len(edr_fnames) ==  n.extensions_max+1 )

			time_offset = n.sampling_length+dt

			for edr_fn in edr_fnames[1:]:	
				# adapt edr starting times
				cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"]
				print("Calling: "+(" ".join(cmd)))
				p = Popen(cmd, stdin=PIPE)
				p.communicate(input=(str(time_offset)+"\n"))
				assert(p.wait() == 0)

				time_offset += n.extensions_length+dt

			# concatenate edr files with adapted starting times
			cmd = ["eneconv", "-f"] + edr_fnames + ["-o", n.dir+"/ener.edr"]
			print("Calling: "+(" ".join(cmd)))
			p = Popen(cmd)
			retcode = p.wait()
			assert(retcode == 0)

	needy_nodes.unlock()
Exemple #2
0
def check_output(cmd):
	print("\n" + '#'*(len(cmd)+13) + "\n# Running: " + cmd + " #\n" + '#'*(len(cmd)+13) +"\n")
	tmp_fn = tempfile.mktemp()
	# The pipe to tee hides the exit-status of cmd, but its save in $PIPESTATUS
	cmd += " | tee "+tmp_fn+" ; exit $PIPESTATUS"
	
	# Debian's default shell is Dash, which does not support $PIPESTATUS
	check_call(cmd, shell=True, executable="/bin/bash")
	output = open(tmp_fn).read()
	os.remove(tmp_fn)
	return(output)
Exemple #3
0
def check_output(cmd):
    print("\n" + '#' * (len(cmd) + 13) + "\n# Running: " + cmd + " #\n" + '#' *
          (len(cmd) + 13) + "\n")
    tmp_fn = tempfile.mktemp()
    # The pipe to tee hides the exit-status of cmd, but its save in $PIPESTATUS
    cmd += " | tee " + tmp_fn + " ; exit $PIPESTATUS"

    # Debian's default shell is Dash, which does not support $PIPESTATUS
    check_call(cmd, shell=True, executable="/bin/bash")
    output = open(tmp_fn).read()
    os.remove(tmp_fn)
    return (output)
Exemple #4
0
def process(node, options):
	
	cmd1 = ["mdrun"]
	
	if(node.state == "em-mdrun-able"):
		cmd1 += ["-s", "../../"+node.tpr_fn]
		cmd1 += ["-c", "../../"+node.pdb_fn]
		cmd1 += ["-o", "../../"+node.dir+"/em.trr"]
		cmd1 += ["-e", "../../"+node.dir+"/em.edr"]
		cmd1 += ["-g", "../../"+node.dir+"/em.log"]
	elif(node.state in ('rerun-able-converged','rerun-able-not-converged')):
		cmd1 += ["-s", "../../"+node.dir+"/rerun_me.tpr"]
		cmd1 += ["-rerun", "../../"+node.dir+"/rerun_me.trr"]
		cmd1 += ["-o", "../../"+node.dir+"/rerun.trr"]
		cmd1 += ["-e", "../../"+node.dir+"/rerun.edr"]
		cmd1 += ["-g", "../../"+node.dir+"/rerun.log"]
	else:
		cmd1 += ["-s", "../../"+node.tpr_fn]
		cmd1 += ["-o", "../../"+node.trr_fn]
		cmd1 += ["-c", "../../"+node.dir+"/outfile.pdb"]

	cmd1 += ["-append", "-cpi", "state.cpt"] # continue previouly state, if exists
	if(options.npme != -1):
		cmd1 += ["-npme", str(options.npme)]
	if(options.nt != 0):
		cmd1 += ["-nt", str(options.nt)]
	if(options.reprod):
		cmd1 += ["-reprod"]
	if(options.pd):
		cmd1 += ["-pd"]
	
	# do parallel job if preferred
	if(not options.seq):
		
		if(call(["which","mdrun_mpi"])==0):
			cmd1[0] = "mdrun_mpi"
			if(call(["which","mpiexec"])==0):
				cmd1 = ["mpiexec", "-np", str(options.np)] + cmd1
		
		if(not options.seq and str(options.pbs) == "mpiexec" and call(["which","mpiexec"])==0):
			cmd1 = ["mpiexec", "-np", str(options.np)] + cmd1


		if(not options.seq and str(options.pbs) == "aprun"):
				cmd1 = ["aprun", "-n", str(options.np)] + cmd1
				
	
	## use mpiexec and mdrun_mpi if available
	#if(not options.seq and call(["which","aprun"])==0):
	#	if(call(["which","mdrun_mpi"])==0):
	#		cmd1[0] = "mdrun_mpi"
	#	cmd1 = ["aprun", "-n", str(options.np)] + cmd1
		
	#http://stackoverflow.com/questions/4554767/terminating-subprocess-in-python
	#alternative
	#p = Popen(...)
	#pp = psutil.Process(p.pid)
	#for child in pp.get_children():
	#	child.send_signal(signal.SIGINT)
	
	#ensure, that childprocess dies when parent dies. Alternative: write own signal-handler e.g for atexit-module
	#http://stackoverflow.com/questions/1884941/killing-the-child-processes-with-the-parent-process
	implant_bomb = None
	try:
		import ctypes
		libc = ctypes.CDLL('libc.so.6')
		PR_SET_PDEATHSIG = 1; TERM = 15
		implant_bomb = lambda: libc.prctl(PR_SET_PDEATHSIG, TERM)
	except:
		warn("Child process might live on when parent gets terminated (feature requires python 2.6).")
	
	print("Calling: %s"%" ".join(cmd1))
	check_call(cmd1, cwd=node.dir, preexec_fn=implant_bomb)

	# if we were just minimizing, we go back to grompp-able now
	if(node.state == "em-mdrun-able"):
		node.state = "grompp-able"
		return

	# if we were just rerunnning, we go back to original state now
	if(node.state in ('rerun-able-converged','rerun-able-not-converged')):
		node.state = node.state.rsplit("rerun-able-", 1)[1]
		return

	if(node.has_restraints and not options.multistart):
		# check for convergence
		converged = conv_check_gelman_rubin(node)
	else:
		# stow away sampling data
		converged = False
		os.remove(node.dir+"/state.cpt")
		for fn in [node.dir+"/outfile.pdb",node.trr_fn, node.dir+"/ener.edr", node.dir+"/md.log"]:
			archive_file(fn, node.extensions_counter)

	# check if user wants to delete files except pdb
	try:
		if (node.save_mode == "pdb"):
			# delete all files except pdb and start files
			for fn in os.listdir(node.dir):
				if(re.match(".+.pdb",fn)==None
				and re.match("[^#].+.mdp",fn)==None
				and re.match(".+.txt",fn)==None
				and re.match("[^#].+.tpr",fn)==None
				and re.match(".+.top",fn)==None
				and fn!="lock"):
					os.remove(node.dir+"/"+str(fn))
	except AttributeError:
		pass
	
	# decide what to do next
	if(converged):
		node.state = "converged"
	elif(node.extensions_counter >= node.extensions_max):
		if(node.has_restraints and not options.multistart):
			node.state = "not-converged"
		else:
			# if user wants to keep everthing we at merge trajectorie and edr files
			# and delete backups
			try:			
				if (node.save_mode == "complete"):
					# merge sampling trajectories
					trr_fns = sorted([ fn for fn in os.listdir(node.dir) if re.match("[^#].+run\d+.trr", fn) ])
					cmd2 = ["trjcat", "-f"]
					cmd2 += trr_fns
					cmd2 += ["-o", "../../"+node.trr_fn, "-cat"]
					print("Calling: %s"%" ".join(cmd2))
					check_call(cmd2, cwd=node.dir)
					# merge edr files
					get_merged_edr(node)
					# delete backups, assuming each backup file starts with '#'
					for fn in os.listdir(node.dir):
						if(re.match("#.+",fn)):					
							os.remove(node.dir+"/"+str(fn))
			except AttributeError:
				pass
			
			# in either case, save as ready node
			node.state = "ready"
			

	else:
		node.extensions_counter += 1
		node.state = "mdrun-able" # actually it should still be in this state
	
		if(node.has_restraints and not options.multistart):
			cmd0 = ["tpbconv", "-s", node.tpr_fn, "-o", node.tpr_fn, "-extend", str(node.extensions_length)]
			print("Calling: %s"%" ".join(cmd0))
			check_call(cmd0) # tell Gromacs to extend the tpr file for another round
		else:
			node.state = "grompp-able"
			zgf_grompp.call_grompp(node) # re-grompp to obtain new random impulse
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if options.common_filename:
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert path.exists(options.molecule)
    assert path.exists(options.presampling)
    assert path.exists(options.internals)
    assert path.exists(options.grompp)
    assert path.exists(options.topology)

    # TODO: what if there is no index-file? (make_ndx)
    assert path.exists(options.index)
    assert "MOI" in gromacs.read_index_file(options.index), "group MOI should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    # options we cannot fix
    for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]):
        assert int(ref_t) == options.temperature, "temperature in mdp file does not match ZIBgridfree temperature"
        # TODO drop options.temperature and get temperature directly from mdp file... ask again if temperature is above 310K

        # options we can fix
    mdp_options_dirty = False  # if set, a new mdp-file will be written
    required_mdp_options = {"dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1"}
    for (k, v) in required_mdp_options.items():
        if mdp_options.has_key(k):
            assert mdp_options[k] == v  # check, if we would overwrite something
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    if mdp_options.has_key("energygrps"):
        assert "MOI" in [
            str(egrp) for egrp in re.findall("[\S]+", mdp_options["energygrps"])
        ], "group MOI should be among energygrps in mdp file"
    else:
        mdp_options["energygrps"] = "MOI"
        mdp_options_dirty = True

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if a and not b:
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif b and not a:
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif b and a:
        assert mdp_options["nstxout"] == mdp_options["nstenergy"], "nstxout should equal nstenergy"

    if int(mdp_options["nsteps"]) > 1e6:
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(userinput(msg, "int", default=int(mdp_options["nsteps"])))

        # create a fixed mdp-file
    if mdp_options_dirty:
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in mdp_options.items():
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

        # check if subsampling is reasonable
    if os.path.getsize(options.presampling) > 100e6:  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if dt < 10:  # picoseconds
            # TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if userinput(msg, "bool"):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"]
                check_call(cmd)
                options.presampling = out_fn

                # balance linears
    if options.balance_linears:
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if not isinstance(c, LinearCoordinate):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
                # TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert old_converter.filename.endswith(".int")
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert len(Converter(options.internals)) == len(new_coord_list)  # try parsing

        # Finally: Create root-node and pool
    pool = Pool()
    if len(pool) != 0:
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = options.temperature
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  # ... now we have to save the pool again.

    if not path.exists("analysis"):
        os.mkdir("analysis")
def main():
	options = options_desc.parse_args(sys.argv)[0]

	if(options.common_filename):
		options.molecule = options.common_filename+".pdb"
		options.presampling = options.common_filename+".trr"
		options.internals = options.common_filename+".int"
		options.grompp = options.common_filename+".mdp"
		options.topology = options.common_filename+".top"
		options.index = options.common_filename+".ndx"

	print("Options:\n%s\n"%pformat(eval(str(options))))

	assert(path.exists(options.molecule))
	assert(path.exists(options.presampling))
	assert(path.exists(options.internals))
	assert(path.exists(options.grompp))
	assert(path.exists(options.topology))
		
	#TODO: what if there is no index-file? (make_ndx)
	assert(path.exists(options.index))
	assert('moi' in gromacs.read_index_file(options.index)), "group 'MOI' should be defined in index file"
 
	# checks e.g. if the mdp-file looks good
	mdp_options = gromacs.read_mdp_file(options.grompp)
	
	temperatures = [ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])]
	assert(len(set(temperatures)) == 1), "temperature definition in mdp file is ambiguous"
	temperature = temperatures[0]

	# get sampling temperature from mdp file
	if(int(temperature) > 310):
		if not(userinput("Your sampling temperature is set to %s K. Continue?"%temperature, "bool")):
			sys.exit("Quit by user.")

	# options we can fix 
 	mdp_options_dirty = False #if set, a new mdp-file will be written

	# the value of the following options need to be fixed
	critical_mdp_options = {"dihre":"yes", "dihre_fc":"1", "disre":"simple", "disre_fc":"1", "gen_temp":temperature}
	for (k,v) in critical_mdp_options.items():
 		if(mdp_options.has_key(k) and mdp_options[k].strip() != v):
			print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file."%(mdp_options[k].strip(),k,v)
			sys.exit("Quitting.")
 		else:
 			mdp_options[k] = v
 			mdp_options_dirty = True

	# the value of the following options does not matter, but they should be there
	noncritical_mdp_options = {"tcoupl":"no", "pcoupl":"no", "gen_vel":"no", "gen_seed":"-1"}
	for (k,v) in noncritical_mdp_options.items():
		if not(mdp_options.has_key(k)):
			mdp_options[k] = v
			mdp_options_dirty = True

	a = mdp_options.has_key("energygrps") and "moi" not in [str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])]
	b = not(mdp_options.has_key("energygrps"))
	if(a or b):
		if not(userinput("'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")):
			sys.exit("Quit by user.")

	a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
	if(a and not b):
		mdp_options["nstenergy"] = mdp_options["nstxout"]
		mdp_options_dirty = True
	elif(b and not a):
		mdp_options["nstxout"] = mdp_options["nstenergy"]
		mdp_options_dirty = True
	elif(b and a):
		assert(mdp_options["nstxout"] == mdp_options["nstenergy"]), "nstxout should equal nstenergy"
		
	if(int(mdp_options["nsteps"]) > 1e6):
		msg = "Number of MD-steps?"
		mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"])) )
	
	# create a fixed mdp-file
	if(mdp_options_dirty):
		print("Creating copy of mdp-file and adding missing options.")
		out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
		f = open(out_fn, "w") # append
		f.write("; Generated by zgf_create_pool\n")
		for i in sorted(mdp_options.items()):
			f.write("%s = %s\n"%i)
		f.write("; EOF\n")
		f.close()
		options.grompp = out_fn
		
	
	# check if subsampling is reasonable
	if(os.path.getsize(options.presampling) > 100e6): # 100MB
		print("Presampling trajectory is large")
		trr = TrrFile(options.presampling)
		dt = trr.first_frame.next().t - trr.first_frame.t
		trr.close()
		print("Presampling timestep is %.2f ps"%dt)
		if(dt < 10): # picoseconds
			#TODO: maybe calculate subsampling factor individually, or ask? 
			msg = "Subsample presampling trajectory by a tenth?"
			if(userinput(msg, "bool")):
				out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
				cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"]
				check_call(cmd)
				options.presampling = out_fn
	
			
	# balance linears
	if(options.balance_linears):
		print("Balance Linears")
		old_converter = Converter(options.internals)
		print("Loading presampling....")
		frames = old_converter.read_trajectory(options.presampling)
		new_coord_list = []
		for c in old_converter:
			if(not isinstance(c, LinearCoordinate)):
				new_coord_list.append(c)
				continue # we do not work on other Coordinate-Types
			#TODO: is this a good way to determine new_weight and new_offset??? 
			new_weight = c.weight / sqrt(2*frames.var().getcoord(c))
			new_offset = c.offset + frames.mean().getcoord(c)
			new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset)
			new_coord_list.append(new_coord)
		new_converter = Converter(coord_list=new_coord_list)
	
		assert(old_converter.filename.endswith(".int"))
		options.internals = old_converter.filename[:-4] + "_balanced.int"
		print("Writing balanced Converter to: "+options.internals)
		f = open(options.internals, "w")
		f.write(new_converter.serialize())
		f.close()
		assert(len(Converter(options.internals)) == len(new_coord_list)) #try parsing
	
	# Finally: Create root-node and pool
	pool = Pool()
	if(len(pool) != 0):
		print("ERROR: A pool already exists here.")
		sys.exit(1)
	
	pool.int_fn = options.internals
	pool.mdp_fn = options.grompp
	pool.top_fn = options.topology
	pool.ndx_fn = options.index
	pool.temperature = int(temperature)
	pool.gr_threshold = options.gr_threshold
	pool.gr_chains = options.gr_chains
	pool.alpha = None
	pool.save() # save pool for the first time...

	# ... then we can save the first node...
	node0 = Node()
	node0.state = "refined"	
	node0.save() # also creates the node directory ... needed for symlink
	os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
	os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)
	
	pool.root_name = node0.name
	pool.save() #... now we have to save the pool again.
	
	if(not path.exists("analysis")):
		os.mkdir("analysis")
Exemple #7
0
def process(node, options):
	
	if(node.extensions_counter > 0):
		cmd0 = ["tpbconv", "-s", node.tpr_fn, "-o", node.tpr_fn, "-extend", str(node.extensions_length)]
		print("Calling: %s"%" ".join(cmd0))
		check_call(cmd0)
	
	cmd1 = ["mdrun"]
	cmd1 += ["-s", "../../"+node.tpr_fn]
	
	if(node.state == "em-mdrun-able"):
		cmd1 += ["-c", "../../"+node.pdb_fn]
		cmd1 += ["-o", "../../"+node.dir+"/em.trr"]
		cmd1 += ["-e", "../../"+node.dir+"/em.edr"]
		cmd1 += ["-g", "../../"+node.dir+"/em.log"]
	else:
		cmd1 += ["-o", "../../"+node.trr_fn]

	cmd1 += ["-append", "-cpi", "state.cpt"] # continue previouly state, if exists
	if(options.npme != -1):
		cmd1 += ["-npme", str(options.npme)]
	if(options.nt != 0):
		cmd1 += ["-nt", str(options.nt)]
	if(options.reprod):
		cmd1 += ["-reprod"]
	if(options.pd):
		cmd1 += ["-pd"]
	
	# use mpiexec and mdrun_mpi if available
	if(not options.seq and call(["which","mpiexec"])==0):
		if(call(["which","mdrun_mpi"])==0):
			cmd1[0] = "mdrun_mpi"
		cmd1 = ["mpiexec", "-np", str(options.np)] + cmd1
		
	#http://stackoverflow.com/questions/4554767/terminating-subprocess-in-python
	#alternative
	#p = Popen(...)
	#pp = psutil.Process(p.pid)
	#for child in pp.get_children():
	#	child.send_signal(signal.SIGINT)
	
	#ensure, that childprocess dies when parent dies. Alternative: write own signal-handler e.g for atexit-module
	#http://stackoverflow.com/questions/1884941/killing-the-child-processes-with-the-parent-process
	implant_bomb = None
	try:
		import ctypes
		libc = ctypes.CDLL('libc.so.6')
		PR_SET_PDEATHSIG = 1; TERM = 15
		implant_bomb = lambda: libc.prctl(PR_SET_PDEATHSIG, TERM)
	except:
		warn("Child process might live on when parent gets terminated (feature requires python 2.6).")
	
	print("Calling: %s"%" ".join(cmd1))
	check_call(cmd1, cwd=node.dir, preexec_fn=implant_bomb)

	# if we were just minimizing, we go back to grompp-able now
	if(node.state == "em-mdrun-able"):
		node.state = "grompp-able"
		return
	
	# check for convergence
	converged = conv_check_gelman_rubin(node)

	# decide what to do next
	if(converged):
		node.state = "converged"

	elif(node.extensions_counter >= node.extensions_max):
		node.state = "not-converged"

	else:
		node.extensions_counter += 1
		node.state = "mdrun-able" # actually it should still be in this state
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if (options.common_filename):
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert (path.exists(options.molecule))
    assert (path.exists(options.presampling))
    assert (path.exists(options.internals))
    assert (path.exists(options.grompp))
    assert (path.exists(options.topology))

    #TODO: what if there is no index-file? (make_ndx)
    assert (path.exists(options.index))
    assert ('moi' in gromacs.read_index_file(
        options.index)), "group 'MOI' should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    temperatures = [
        ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])
    ]
    assert (len(set(temperatures)) == 1
            ), "temperature definition in mdp file is ambiguous"
    temperature = temperatures[0]

    # get sampling temperature from mdp file
    if (int(temperature) > 310):
        if not (userinput(
                "Your sampling temperature is set to %s K. Continue?" %
                temperature, "bool")):
            sys.exit("Quit by user.")

    # options we can fix
    mdp_options_dirty = False  #if set, a new mdp-file will be written

    # the value of the following options need to be fixed
    critical_mdp_options = {
        "dihre": "yes",
        "dihre_fc": "1",
        "disre": "simple",
        "disre_fc": "1",
        "gen_temp": temperature
    }
    for (k, v) in critical_mdp_options.items():
        if (mdp_options.has_key(k) and mdp_options[k].strip() != v):
            print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file." % (
                mdp_options[k].strip(), k, v)
            sys.exit("Quitting.")
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    # the value of the following options does not matter, but they should be there
    noncritical_mdp_options = {
        "tcoupl": "no",
        "pcoupl": "no",
        "gen_vel": "no",
        "gen_seed": "-1"
    }
    for (k, v) in noncritical_mdp_options.items():
        if not (mdp_options.has_key(k)):
            mdp_options[k] = v
            mdp_options_dirty = True

    a = mdp_options.has_key("energygrps") and "moi" not in [
        str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])
    ]
    b = not (mdp_options.has_key("energygrps"))
    if (a or b):
        if not (userinput(
                "'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?",
                "bool")):
            sys.exit("Quit by user.")

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if (a and not b):
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif (b and not a):
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif (b and a):
        assert (mdp_options["nstxout"] == mdp_options["nstenergy"]
                ), "nstxout should equal nstenergy"

    if (int(mdp_options["nsteps"]) > 1e6):
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(
            userinput(msg, "int", default=int(mdp_options["nsteps"])))

    # create a fixed mdp-file
    if (mdp_options_dirty):
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in sorted(mdp_options.items()):
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

    # check if subsampling is reasonable
    if (os.path.getsize(options.presampling) > 100e6):  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if (dt < 10):  # picoseconds
            #TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if (userinput(msg, "bool")):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = [
                    "trjconv", "-f", options.presampling, "-o", out_fn,
                    "-skip", "10"
                ]
                check_call(cmd)
                options.presampling = out_fn

    # balance linears
    if (options.balance_linears):
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if (not isinstance(c, LinearCoordinate)):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
            #TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms,
                                         label=c.label,
                                         weight=new_weight,
                                         offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert (old_converter.filename.endswith(".int"))
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert (len(Converter(options.internals)) == len(new_coord_list)
                )  #try parsing

    # Finally: Create root-node and pool
    pool = Pool()
    if (len(pool) != 0):
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = int(temperature)
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  #... now we have to save the pool again.

    if (not path.exists("analysis")):
        os.mkdir("analysis")
def main():

    options = options_desc.parse_args(sys.argv)[0]
    pool = Pool()

    needy_nodes = pool.where("state == 'merge-able'").multilock()

    if (len(needy_nodes) == 0):
        return

    # find out about trr time step
    dt = 0
    nodeDir = needy_nodes[0].dir.split('/')[-1]
    for fn in os.listdir(needy_nodes[0].dir):
        if re.match("^" + nodeDir + ".+run\d+\.trr", fn):
            trr = TrrFile(needy_nodes[0].dir + "/" + fn)
            dt = trr.first_frame.next().t - trr.first_frame.t
            trr.close()
            break

    # dt is sometimes noisy in the final digits (three digits is femtosecond step = enough)
    dt = np.around(dt, decimals=3)
    for n in needy_nodes:

        if (options.trr):
            # merge sampling trajectories
            trr_fns = sorted([
                fn for fn in os.listdir(n.dir)
                if re.match("[^#].+run\d+.trr", fn)
            ])
            cmd = ["trjcat", "-f"]
            cmd += trr_fns
            cmd += ["-o", "../../" + n.trr_fn, "-cat"]
            print("Calling: %s" % " ".join(cmd))
            check_call(cmd, cwd=n.dir)

        if (options.edr):
            # merge edr files
            # get list of edr-files
            edr_fnames = sorted([
                n.dir + "/" + fn for fn in os.listdir(n.dir)
                if re.match("[^#].+run\d+.edr", fn)
            ])
            assert (len(edr_fnames) == n.extensions_counter + 1)
            assert (len(edr_fnames) == n.extensions_max + 1)

            time_offset = n.sampling_length + dt

            for edr_fn in edr_fnames[1:]:
                # adapt edr starting times
                cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"]
                print("Calling: " + (" ".join(cmd)))
                p = Popen(cmd, stdin=PIPE)
                p.communicate(input=(str(time_offset) + "\n"))
                assert (p.wait() == 0)

                time_offset += n.extensions_length + dt

            # concatenate edr files with adapted starting times
            cmd = ["eneconv", "-f"] + edr_fnames + ["-o", n.dir + "/ener.edr"]
            print("Calling: " + (" ".join(cmd)))
            p = Popen(cmd)
            retcode = p.wait()
            assert (retcode == 0)

    needy_nodes.unlock()