Example #1
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	#TODO put somehow into Options, e.g. min_value=1 or required=True
	if(not options.doomed_nodes):
		sys.exit("Option --doomed_nodes is required.")
		
	pool = Pool()
	old_pool_size = len(pool)
	old_alpha = pool.alpha

	doomed_nodes = NodeList()
	
	#TODO: maybe this code should go into ZIBMolPy.ui 
	for name in options.doomed_nodes.split(","):
		found = [n for n in pool if n.name == name]
		if(len(found) != 1):
			sys.exit("Coult not find node '%s'"%(name))
		doomed_nodes.append(found[0])
	
	for n in doomed_nodes:
		if(n == pool.root):
			sys.exit("Node %s is the root. Removal not allowed."%(n.name))		
		#if(len(n.children) > 0):
		#	sys.exit("Node %s has children. Removal not allowed."%(n.name)) #TODO why should we forbid this?

	if not(userinput("The selected node(s) will be removed permanently. Continue?", "bool")):
		sys.exit("Quit by user.")

	assert(len(doomed_nodes) == len(doomed_nodes.multilock()))
	for n in doomed_nodes:
		print("Removing directory: "+n.dir)
		shutil.rmtree(n.dir)

	pool.reload_nodes()
	
	#TODO: this code-block also exists in zgf_create_node
	if(len(pool.where("isa_partition")) < 2):
		pool.alpha = None
	elif(options.methodalphas == "theta"):
		pool.alpha = zgf_create_nodes.calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unkown: "+options.methodalphas))

	pool.history.append({'removed_nodes': [(n.name, n.state) for n in doomed_nodes], 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	pool.save()

	#TODO: deal with analysis dir and dependencies
	zgf_cleanup.main()	
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	#TODO put somehow into Options, e.g. min_value=1 or required=True
	if(not options.doomed_nodes):
		sys.exit("Option --doomed_nodes is required.")
		
	pool = Pool()
	old_pool_size = len(pool)
	old_alpha = pool.alpha

	doomed_nodes = NodeList()
	
	#TODO: maybe this code should go into ZIBMolPy.ui 
	for name in options.doomed_nodes.split(","):
		found = [n for n in pool if n.name == name]
		if(len(found) != 1):
			sys.exit("Coult not find node '%s'"%(name))
		doomed_nodes.append(found[0])
	
	for n in doomed_nodes:
		if(n == pool.root):
			sys.exit("Node %s is the root. Removal not allowed."%(n.name))		
		#if(len(n.children) > 0):
		#	sys.exit("Node %s has children. Removal not allowed."%(n.name)) #TODO why should we forbid this?

	if not(userinput("The selected node(s) will be removed permanently. Continue?", "bool")):
		sys.exit("Quit by user.")

	assert(len(doomed_nodes) == len(doomed_nodes.multilock()))
	for n in doomed_nodes:
		print("Removing directory: "+n.dir)
		shutil.rmtree(n.dir)

	pool.reload_nodes()
	
	#TODO: this code-block also exists in zgf_create_node
	if(len(pool.where("isa_partition")) < 2):
		pool.alpha = None
	elif(options.methodalphas == "theta"):
		pool.alpha = zgf_create_nodes.calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unkown: "+options.methodalphas))

	pool.history.append({'removed_nodes': [(n.name, n.state) for n in doomed_nodes], 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	pool.save()

	#TODO: deal with analysis dir and dependencies
	zgf_cleanup.main()	
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	pool = Pool()
	
	choice = "state in ('converged', 'refined')"
	if(options.ignore_convergence):
		choice = "state in ('converged', 'not-converged', 'refined')"	

	needy_nodes = NodeList([n for n in pool.where(choice) if not n == pool.root]) # we won't touch the root

	if not(len(needy_nodes)):
		sys.exit("Nothing to do.")

	if not(userinput("Once the solvent has been removed, further refinement of the pool is not possible. This includes the generation of unrestrained transition nodes! Continue?", "bool")):
		sys.exit("Quit by user.")
		
	assert(len(needy_nodes) == len(needy_nodes.multilock())) # make sure we lock ALL nodes

	try:
		for n in needy_nodes:	
			discard_solvent(n, "pdb")
			discard_solvent(n, "trr")

		for n in needy_nodes:
			n.unlock()
	except:
		traceback.print_exc()
Example #4
0
def main(argv=None):
	if(argv==None):
		argv = sys.argv
	options = options_desc.parse_args(argv)[0]

	assert(not(options.refine_all and options.extend_all)) 
	
	pool = Pool()
	needy_nodes = pool.where("isa_partition and is_sampled").multilock()
	
	# 1. Trying to detect fake convergence
	for n in pool.where("state == 'converged'"):
		means = kmeans(n.trajectory, k=2)
		d = (means[0] - means[1]).norm2()
		if(d > 2.0 and (options.refine_all or userinput("%s has converged but appears to have a bimodal distribution.\nDo you want to refine?"%n.name, "bool"))): #TODO decide upon threshold (per coordinate?)
			refine(n, options)
	
	# 2. Dealing with not-converged nodes
	for n in pool.where("state == 'not-converged'"):
		if(not(options.refine_all or options.extend_all)):
			choice = userchoice("%s has not converged. What do you want to do?"%n.name, ['_refine', '_extend', '_ignore'])
		if(options.refine_all or choice=="r"):
			refine(n, options)
		elif(options.extend_all or choice=="e"):
			extend(n)
		elif(choice=="i"):
			continue
	
	for n in needy_nodes:
		n.save()
		n.unlock()
			
	zgf_setup_nodes.main()
	zgf_grompp.main()
	zgf_cleanup.main()	
def main():
    (options, args) = options_desc.parse_args(sys.argv)
    assert (path.exists(options.input_topology))
    print("Preprocessing (only local includes): %s ..." %
          options.input_topology)
    rawdata = preprocess(options.input_topology,
                         includedirs=[])  #only local includes

    print("\nParsing...")
    top = Topology(rawdata)
    print("The topology contains:")

    for m in top.molecules:
        print("  %d molecule(s) of the moleculetype '%s'" % (m.mols, m.name))

    #find candidates for molecules to merge
    candidates = []
    for i in range(len(top.molecules) - 1):
        #check this and the next molecule
        is_candidate = True
        for m in top.molecules[i:i + 2]:
            is_candidate &= (m.mols == 1)
            uses = [n for n in top.molecules if n.name == m.name]
            is_candidate &= (len(uses) == 1)
        if (is_candidate):
            candidates.append(i)

    #pick a candidate
    if (len(candidates) == 0):
        print("Topology contains no mergable moleculetypes - abort.")
        sys.exit(1)

    elif (len(candidates) == 1):
        mt_index1 = candidates[0]
        print("Topology contains only one mergable pair of moleculetypes.")

    else:
        msg = "Only two consecutively molecultypes with mol=1 can be merged.\n"
        msg += "Choose index of first moleculetype.\n"
        for i in candidates:
            msg += "%d: %s\n" % (i, top.molecules[i].name)
        mt_index1 = userinput(msg, "int", condition="x in " + repr(candidates))

    #print choosen moleculetypes
    mt_name1 = top.molecules[mt_index1].name
    mt_name2 = top.molecules[mt_index1 + 1].name
    print("Merging moleculetype '%s' with '%s'." % (mt_name1, mt_name2))
    merge_moleculetypes(top, mt_name1, mt_name2)

    print("")
    print("The merged topology contains:")
    for m in top.molecules:
        print("  %d molecule(s) of the moleculetype '%s'" % (m.mols, m.name))

    top_out_fn = options.output_topology
    print("Writting merged topology to " + top_out_fn)
    f = open(top_out_fn, "w")
    f.write(top.write())
    f.close()
    print("DONE")
Example #6
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    pool = Pool()

    choice = "state in ('converged', 'refined')"
    if (options.ignore_convergence):
        choice = "state in ('converged', 'not-converged', 'refined')"

    needy_nodes = NodeList([
        n for n in pool.where(choice) if not n == pool.root
    ])  # we won't touch the root

    if not (len(needy_nodes)):
        sys.exit("Nothing to do.")

    if not (userinput(
            "Once the solvent has been removed, further refinement of the pool is not possible. This includes the generation of unrestrained transition nodes! Continue?",
            "bool")):
        sys.exit("Quit by user.")

    assert (len(needy_nodes) == len(needy_nodes.multilock())
            )  # make sure we lock ALL nodes

    try:
        for n in needy_nodes:
            discard_solvent(n, "pdb")
            discard_solvent(n, "trr")

        for n in needy_nodes:
            n.unlock()
    except:
        traceback.print_exc()
def main():
	(options, args) = options_desc.parse_args(sys.argv)
	assert(path.exists(options.input_topology))
	print("Preprocessing (only local includes): %s ..."%options.input_topology)
	rawdata = preprocess(options.input_topology, includedirs=[]) #only local includes
	
	print("\nParsing...")
	top = Topology(rawdata)
	print("The topology contains:")
	
	for m in top.molecules:
		print("  %d molecule(s) of the moleculetype '%s'"%(m.mols, m.name))
		
	#find candidates for molecules to merge 
	candidates = []
	for i in range(len(top.molecules)-1):
		#check this and the next molecule
		is_candidate = True
		for m in top.molecules[i:i+2]:
			is_candidate &= (m.mols==1)
			uses = [n for n in top.molecules if n.name == m.name]
			is_candidate &= (len(uses)==1)
		if(is_candidate):
			candidates.append(i)
	
	#pick a candidate
	if(len(candidates) == 0):
		print("Topology contains no mergable moleculetypes - abort.")
		sys.exit(1)
		
	elif(len(candidates) == 1):
		mt_index1 = candidates[0]
		print("Topology contains only one mergable pair of moleculetypes.")
		
	else:
		msg = "Only two consecutively molecultypes with mol=1 can be merged.\n"
		msg += "Choose index of first moleculetype.\n"
		for i in candidates:
			msg += "%d: %s\n"%(i, top.molecules[i].name)
		mt_index1 = userinput(msg, "int", condition="x in "+repr(candidates))
	
	#print choosen moleculetypes
	mt_name1 = top.molecules[mt_index1].name
	mt_name2 = top.molecules[mt_index1+1].name
	print("Merging moleculetype '%s' with '%s'."%(mt_name1, mt_name2))
	merge_moleculetypes(top, mt_name1, mt_name2)

	print("")
	print("The merged topology contains:")
	for m in top.molecules:
		print("  %d molecule(s) of the moleculetype '%s'"%(m.mols, m.name))
	
	
	top_out_fn =  options.output_topology
	print("Writting merged topology to "+top_out_fn)
	f = open(top_out_fn, "w")
	f.write(top.write())
	f.close()
	print("DONE")
def main():
    options = options_desc.parse_args(sys.argv)[0]

    pool = Pool()
    needy_nodes = pool.where("state == 'grompp-able'")
    assert (len(needy_nodes) == len(needy_nodes.multilock())
            )  # make sure we lock ALL nodes

    if (options.solv_model == "tip3p"):
        solv_box = "spc216.gro"
        solv_fn = "tip3p.itp"
    elif (options.solv_model == "tip4p"):
        solv_box = "tip4p.gro"
        solv_fn = "tip4p.itp"
    elif (options.solv_model == "tip4pew"):
        solv_box = "tip4p.gro"
        solv_fn = "tip4pew.itp"
    elif (options.solv_model == "tip5"):
        solv_box = "tip5p.gro"
        solv_fn = "tip5p.itp"
    elif (options.solv_model == "spc"):
        solv_box = "spc216.gro"
        solv_fn = "spc.itp"
    elif (options.solv_model == "spce"):
        solv_box = "spc216.gro"
        solv_fn = "spce.itp"
    elif (
            options.solv_model == "acetonitrile"
    ):  # TODO one might change this one to "custom" and let user enter name of template box
        solv_box = "acetonitrile.pdb"
        msg = "Topology update for acetonitrile is not supported. Proceed?"
        if not (userinput(msg, "bool")):
            for n in needy_nodes:
                n.unlock()
            return ("Quit by user.")

    # determine maximum length of linears, if any
    max_linear = query_linear_length(pool)

    # make box and fill with solvent
    genbox(pool, max_linear, options.bt,
           (options.box_x, options.box_y, options.box_z), solv_box)

    # update topology files (add solvent model and ions includes)
    if not (options.solv_model == "acetonitrile"):
        update_tops(pool, solv_fn)

    for n in needy_nodes:
        n.state = "em-grompp-able"
        zgf_grompp.call_grompp(
            n, mdp_file=options.grompp, final_state="em-mdrun-able"
        )  # re-grompp to get a tpr for energy minimization
        n.unlock()
Example #9
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	pool = Pool()
	needy_nodes = pool.where("state == 'grompp-able'")
	assert(len(needy_nodes) == len(needy_nodes.multilock())) # make sure we lock ALL nodes

	if(options.solv_model == "tip3p"):
		solv_box = "spc216.gro"
		solv_fn = "tip3p.itp"
	elif(options.solv_model == "tip4p"):
		solv_box = "tip4p.gro"
		solv_fn = "tip4p.itp"
	elif(options.solv_model == "tip4pew"):
		solv_box = "tip4p.gro"
		solv_fn = "tip4pew.itp"
	elif(options.solv_model == "tip5"):
		solv_box = "tip5p.gro"
		solv_fn = "tip5p.itp"
	elif(options.solv_model == "spc"):
		solv_box = "spc216.gro"
		solv_fn = "spc.itp"
	elif(options.solv_model == "spce"):
		solv_box = "spc216.gro"
		solv_fn = "spce.itp"
	elif(options.solv_model == "acetonitrile"): # TODO one might change this one to "custom" and let user enter name of template box
		solv_box = "acetonitrile.pdb"
		msg = "Topology update for acetonitrile is not supported. Proceed?"
		if not(userinput(msg, "bool")):
			for n in needy_nodes:
				n.unlock()
			return("Quit by user.")
	
	# determine maximum length of linears, if any
	max_linear = query_linear_length(pool)

	# make box and fill with solvent
	genbox(pool, max_linear, options.bt, (options.box_x, options.box_y, options.box_z), solv_box)

	# update topology files (add solvent model and ions includes)
	if not(options.solv_model == "acetonitrile"):
		update_tops(pool, solv_fn)

	for n in needy_nodes:
		n.state = "em-grompp-able"
		zgf_grompp.call_grompp(n, mdp_file=options.grompp, final_state="em-mdrun-able") # re-grompp to get a tpr for energy minimization
		n.unlock()
Example #10
0
def main(argv=None):
    if (argv == None):
        argv = sys.argv
    options = options_desc.parse_args(argv)[0]

    assert (not (options.refine_all and options.extend_all))

    pool = Pool()
    needy_nodes = pool.where("isa_partition and is_sampled").multilock()

    # 1. Trying to detect fake convergence
    for n in pool.where("state == 'converged'"):
        means = kmeans(n.trajectory, k=2)
        d = (means[0] - means[1]).norm2()
        if (d > 2.0 and (options.refine_all or userinput(
                "%s has converged but appears to have a bimodal distribution.\nDo you want to refine?"
                % n.name,
                "bool"))):  #TODO decide upon threshold (per coordinate?)
            refine(n, options)

    # 2. Dealing with not-converged nodes
    for n in pool.where("state == 'not-converged'"):
        if (not (options.refine_all or options.extend_all)):
            choice = userchoice(
                "%s has not converged. What do you want to do?" % n.name,
                ['_refine', '_extend', '_ignore'])
        if (options.refine_all or choice == "r"):
            refine(n, options)
        elif (options.extend_all or choice == "e"):
            extend(n)
        elif (choice == "i"):
            continue

    for n in needy_nodes:
        n.save()
        n.unlock()

    zgf_setup_nodes.main()
    zgf_grompp.main()
    zgf_cleanup.main()
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if options.common_filename:
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert path.exists(options.molecule)
    assert path.exists(options.presampling)
    assert path.exists(options.internals)
    assert path.exists(options.grompp)
    assert path.exists(options.topology)

    # TODO: what if there is no index-file? (make_ndx)
    assert path.exists(options.index)
    assert "MOI" in gromacs.read_index_file(options.index), "group MOI should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    # options we cannot fix
    for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]):
        assert int(ref_t) == options.temperature, "temperature in mdp file does not match ZIBgridfree temperature"
        # TODO drop options.temperature and get temperature directly from mdp file... ask again if temperature is above 310K

        # options we can fix
    mdp_options_dirty = False  # if set, a new mdp-file will be written
    required_mdp_options = {"dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1"}
    for (k, v) in required_mdp_options.items():
        if mdp_options.has_key(k):
            assert mdp_options[k] == v  # check, if we would overwrite something
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    if mdp_options.has_key("energygrps"):
        assert "MOI" in [
            str(egrp) for egrp in re.findall("[\S]+", mdp_options["energygrps"])
        ], "group MOI should be among energygrps in mdp file"
    else:
        mdp_options["energygrps"] = "MOI"
        mdp_options_dirty = True

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if a and not b:
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif b and not a:
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif b and a:
        assert mdp_options["nstxout"] == mdp_options["nstenergy"], "nstxout should equal nstenergy"

    if int(mdp_options["nsteps"]) > 1e6:
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(userinput(msg, "int", default=int(mdp_options["nsteps"])))

        # create a fixed mdp-file
    if mdp_options_dirty:
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in mdp_options.items():
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

        # check if subsampling is reasonable
    if os.path.getsize(options.presampling) > 100e6:  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if dt < 10:  # picoseconds
            # TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if userinput(msg, "bool"):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"]
                check_call(cmd)
                options.presampling = out_fn

                # balance linears
    if options.balance_linears:
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if not isinstance(c, LinearCoordinate):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
                # TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert old_converter.filename.endswith(".int")
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert len(Converter(options.internals)) == len(new_coord_list)  # try parsing

        # Finally: Create root-node and pool
    pool = Pool()
    if len(pool) != 0:
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = options.temperature
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  # ... now we have to save the pool again.

    if not path.exists("analysis"):
        os.mkdir("analysis")
Example #12
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	if(options.common_filename):
		options.molecule = options.common_filename+".pdb"
		options.presampling = options.common_filename+".trr"
		options.internals = options.common_filename+".int"
		options.grompp = options.common_filename+".mdp"
		options.topology = options.common_filename+".top"
		options.index = options.common_filename+".ndx"

	print("Options:\n%s\n"%pformat(eval(str(options))))

	assert(path.exists(options.molecule))
	assert(path.exists(options.presampling))
	assert(path.exists(options.internals))
	assert(path.exists(options.grompp))
	assert(path.exists(options.topology))
		
	#TODO: what if there is no index-file? (make_ndx)
	assert(path.exists(options.index))
	assert('moi' in gromacs.read_index_file(options.index)), "group 'MOI' should be defined in index file"
 
	# checks e.g. if the mdp-file looks good
	mdp_options = gromacs.read_mdp_file(options.grompp)
	
	temperatures = [ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])]
	assert(len(set(temperatures)) == 1), "temperature definition in mdp file is ambiguous"
	temperature = temperatures[0]

	# get sampling temperature from mdp file
	if(int(temperature) > 310):
		if not(userinput("Your sampling temperature is set to %s K. Continue?"%temperature, "bool")):
			sys.exit("Quit by user.")

	# options we can fix 
 	mdp_options_dirty = False #if set, a new mdp-file will be written

	# the value of the following options need to be fixed
	critical_mdp_options = {"dihre":"yes", "dihre_fc":"1", "disre":"simple", "disre_fc":"1", "gen_temp":temperature}
	for (k,v) in critical_mdp_options.items():
 		if(mdp_options.has_key(k) and mdp_options[k].strip() != v):
			print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file."%(mdp_options[k].strip(),k,v)
			sys.exit("Quitting.")
 		else:
 			mdp_options[k] = v
 			mdp_options_dirty = True

	# the value of the following options does not matter, but they should be there
	noncritical_mdp_options = {"tcoupl":"no", "pcoupl":"no", "gen_vel":"no", "gen_seed":"-1"}
	for (k,v) in noncritical_mdp_options.items():
		if not(mdp_options.has_key(k)):
			mdp_options[k] = v
			mdp_options_dirty = True

	a = mdp_options.has_key("energygrps") and "moi" not in [str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])]
	b = not(mdp_options.has_key("energygrps"))
	if(a or b):
		if not(userinput("'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")):
			sys.exit("Quit by user.")

	a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
	if(a and not b):
		mdp_options["nstenergy"] = mdp_options["nstxout"]
		mdp_options_dirty = True
	elif(b and not a):
		mdp_options["nstxout"] = mdp_options["nstenergy"]
		mdp_options_dirty = True
	elif(b and a):
		assert(mdp_options["nstxout"] == mdp_options["nstenergy"]), "nstxout should equal nstenergy"
		
	if(int(mdp_options["nsteps"]) > 1e6):
		msg = "Number of MD-steps?"
		mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"])) )
	
	# create a fixed mdp-file
	if(mdp_options_dirty):
		print("Creating copy of mdp-file and adding missing options.")
		out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
		f = open(out_fn, "w") # append
		f.write("; Generated by zgf_create_pool\n")
		for i in sorted(mdp_options.items()):
			f.write("%s = %s\n"%i)
		f.write("; EOF\n")
		f.close()
		options.grompp = out_fn
		
	
	# check if subsampling is reasonable
	if(os.path.getsize(options.presampling) > 100e6): # 100MB
		print("Presampling trajectory is large")
		trr = TrrFile(options.presampling)
		dt = trr.first_frame.next().t - trr.first_frame.t
		trr.close()
		print("Presampling timestep is %.2f ps"%dt)
		if(dt < 10): # picoseconds
			#TODO: maybe calculate subsampling factor individually, or ask? 
			msg = "Subsample presampling trajectory by a tenth?"
			if(userinput(msg, "bool")):
				out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
				cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"]
				check_call(cmd)
				options.presampling = out_fn
	
			
	# balance linears
	if(options.balance_linears):
		print("Balance Linears")
		old_converter = Converter(options.internals)
		print("Loading presampling....")
		frames = old_converter.read_trajectory(options.presampling)
		new_coord_list = []
		for c in old_converter:
			if(not isinstance(c, LinearCoordinate)):
				new_coord_list.append(c)
				continue # we do not work on other Coordinate-Types
			#TODO: is this a good way to determine new_weight and new_offset??? 
			new_weight = c.weight / sqrt(2*frames.var().getcoord(c))
			new_offset = c.offset + frames.mean().getcoord(c)
			new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset)
			new_coord_list.append(new_coord)
		new_converter = Converter(coord_list=new_coord_list)
	
		assert(old_converter.filename.endswith(".int"))
		options.internals = old_converter.filename[:-4] + "_balanced.int"
		print("Writing balanced Converter to: "+options.internals)
		f = open(options.internals, "w")
		f.write(new_converter.serialize())
		f.close()
		assert(len(Converter(options.internals)) == len(new_coord_list)) #try parsing
	
	# Finally: Create root-node and pool
	pool = Pool()
	if(len(pool) != 0):
		print("ERROR: A pool already exists here.")
		sys.exit(1)
	
	pool.int_fn = options.internals
	pool.mdp_fn = options.grompp
	pool.top_fn = options.topology
	pool.ndx_fn = options.index
	pool.temperature = int(temperature)
	pool.gr_threshold = options.gr_threshold
	pool.gr_chains = options.gr_chains
	pool.alpha = None
	pool.save() # save pool for the first time...

	# ... then we can save the first node...
	node0 = Node()
	node0.state = "refined"	
	node0.save() # also creates the node directory ... needed for symlink
	os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
	os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)
	
	pool.root_name = node0.name
	pool.save() #... now we have to save the pool again.
	
	if(not path.exists("analysis")):
		os.mkdir("analysis")
Example #13
0
def main(argv=None):
	if(argv==None): 
		argv = sys.argv
	options = options_desc.parse_args(argv)[0]
	
	print("Options:\n%s\n"%pformat(eval(str(options))))

	if(options.random_seed):
		# using numpy-random because python-random differs beetween 32 and 64 bit
		np.random.seed(hash(options.random_seed))
	
	pool = Pool()
	old_pool_size = len(pool)
	print "pool", pool
	
	if(options.parent_node == "root"):
		parent = pool.root
	else:
		found = [n for n in pool if n.name == options.parent_node]
		assert(len(found) == 1)
		parent = found[0]
	
	
	print "### Generate nodes: %s ###" % options.methodnodes
	if(options.methodnodes == "kmeans"):
		chosen_idx = mknodes_kmeans(parent, options.numnodes)
	elif(options.methodnodes == "equidist"):
		chosen_idx = mknodes_equidist(parent, options.numnodes)
	elif(options.methodnodes == "maxdist"):
		chosen_idx = mknodes_maxdist(parent, options.numnodes)
	elif(options.methodnodes == "all"):
		chosen_idx = mknodes_all(parent)
	else:
		raise(Exception("Method unknown: "+options.methodnodes))

	chosen_idx.sort() # makes preview-trajectory easier to understand 
	if(options.write_preview):
		write_node_preview(pool, parent, chosen_idx)
	
	for i in chosen_idx:
		n = Node()
		n.parent_frame_num = i
		n.parent = parent
		n.state = "creating-a-partition" # will be set to "created" at end of script
		n.extensions_counter = 0
		n.extensions_max = options.ext_max
		n.extensions_length = options.ext_length
		n.sampling_length = options.sampling_length	
		n.internals = parent.trajectory.getframe(i)
		pool.append(n)
		
	print "\n### Obtain alpha: %s ###" % options.methodalphas
	old_alpha = pool.alpha
	if(options.methodalphas == "theta"):
		pool.alpha = calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unknown: "+options.methodalphas))
	
	pool.history.append({'refined_node': (parent.name, parent.state), 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	
	pool.save() # alpha might have changed
	
	print "\n### Obtain phi fit: %s ###" % options.methodphifit
	if(options.methodphifit == "harmonic"):
		do_phifit_harmonic(pool)
	elif(options.methodphifit == "switch"):
		do_phifit_switch(pool)
	elif(options.methodphifit == "leastsq"):
		do_phifit_leastsq(pool)
	else:
		raise(Exception("Method unkown: "+options.methodphifit))

	for n in pool.where("state == 'creating-a-partition'"):
		n.state = "created"
		n.save()
		print "saving " +str(n)
		
	zgf_cleanup.main()
Example #14
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	zgf_cleanup.main()
	
	pool = Pool()
	active_nodes = pool.where("isa_partition")
	if(options.ignore_failed):
			active_nodes = pool.where("isa_partition and not state=='mdrun-failed'")

	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if active_nodes.where("'weight_direct' not in obs"):
		active_nodes.unlock()
		sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.")
	
	print "\n### Getting S matrix ..."
	s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat, fast=options.fast_mat)
	register_file_dependency(pool.s_mat_fn, pool.filename)

	node_weights = np.array([node.obs.weight_direct for node in active_nodes])
	
	print "\n### Symmetrizing S matrix ..."
	(corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error))

	# store intermediate results
	register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn)

	np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes])
	
	if options.export_matlab:
		savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights})
		savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix})

	for (n, cw) in zip(active_nodes, corr_node_weights):
		n.obs.weight_corrected = cw
		
	print "\n### Node weights after symmetrization of S matrix:"
	for n in active_nodes:
		print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected))
		n.save()

	active_nodes.unlock()

	# calculate and sort eigenvalues in descending order
	(eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix)
	argsorted_eigvalues = np.argsort(-eigvalues)
	eigvalues = eigvalues[argsorted_eigvalues]
	eigvectors = eigvectors[:, argsorted_eigvalues]
	
	gaps = np.abs(eigvalues[1:]-eigvalues[:-1])
	gaps = np.append(gaps, 0.0)
	wgaps = gaps*eigvalues

	print "\n### Sorted eigenvalues of symmetrized S matrix:"
	for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps):
		print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap)
	n_clusters = np.argmax(wgaps)+1
	print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters)
	print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1)
	sys.stdout.flush()
	if not options.auto_cluster:
		n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0")
	print "### Using %d clusters for PCCA+ ..."%n_clusters

	if options.export_matlab:
		savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors})
	
	# orthogonalize and normalize eigenvectors 
	eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights)

	# perform PCCA+
	# First two return-values "c_f" and "indicator" are not needed
	(chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:]

	if(options.optimize_chi):
		print "\n### Optimizing chi matrix ..."
		
		outliers = 5
		mean_weight = np.mean(corr_node_weights)
		threshold = mean_weight/100*outliers
		print "Light-weight node threshold (%d%% of mean corrected node weight): %.4f."%(outliers, threshold)

		# accumulate nodes for optimization
		edges = np.where(np.max(chi_matrix, axis=1) > 0.9999)[0] # edges of simplex
		heavies = np.where( corr_node_weights > threshold)[0] # heavy-weight nodes
		filtered_eigvectors = eigvectors[ np.union1d(edges, heavies) ]

		# perform the actual optimization
		rot_matrix = opt_soft(filtered_eigvectors, rot_matrix, n_clusters)

		chi_matrix = np.dot(eigvectors[:,:n_clusters], rot_matrix)
		
		# deal with light-weight nodes: shift and scale
		for i in np.where(corr_node_weights <= threshold)[0]:
			if(i in edges):
				print "Column %d belongs to (potentially dangerous) light-weight node, but its node is a simplex edge."%(i+1)
				continue
			print "Column %d is shifted and scaled."%(i+1)
			col_min = np.min( chi_matrix[i,:] )
			chi_matrix[i,:] -= col_min
			chi_matrix[i,:] /= 1-(n_clusters*col_min)
			
	qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters)
	cluster_weights = rot_matrix[0]
	
	print "\n### Matrix numerics check"
	print "-- Q_c matrix row sums --"
	print np.sum(qc_matrix, axis=1)
	print "-- cluster weights: first column of rot_matrix --"
	print cluster_weights
	print "-- cluster weights: numpy.dot(node_weights, chi_matrix) --"
	print np.dot(corr_node_weights, chi_matrix)
	print "-- chi matrix column max values --"
	print np.max(chi_matrix, axis=0)
	print "-- chi matrix row sums --"
	print np.sum(chi_matrix, axis=1)

	# store final results
	np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes])
	np.savez(pool.qc_mat_fn,  matrix=qc_matrix,  n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights)

	if options.export_matlab:		
		savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix})
		savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights})

	register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn)
	register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn)

	for fn in (pool.s_mat_fn, pool.s_corr_mat_fn):
		register_file_dependency(pool.chi_mat_fn, fn)
		register_file_dependency(pool.qc_mat_fn, fn)

	# touch analysis directory (triggering update in zgf_browser)
	atime = mtime = time.time()
	os.utime(pool.analysis_dir, (atime, mtime))

	# show summary
	if(options.summary):
		print "\n### Preparing cluster summary ..."
		chi_threshold = 1E-3
		from pprint import pformat
	
		for i in range(n_clusters):
			involved_nodes = [active_nodes[ni] for ni in np.argwhere(chi_matrix[:,i] > chi_threshold)]
			max_chi_node = active_nodes[ np.argmax(chi_matrix[:,i]) ]
			c_max = []

			for c in  pool.converter:
				coord_range = pool.coord_range(c)
				scale = c.plot_scale
				edges = scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))
				hist_cluster = np.zeros(edges.size-1)

				for (n, chi) in zip([n for n in active_nodes], chi_matrix[:,i]):
					samples = scale( n.trajectory.getcoord(c) )
					hist_node = np.histogram(samples, bins=edges, weights=n.frameweights, normed=True)[0]
					hist_cluster += n.obs.weight_corrected * hist_node * chi

				c_max.append( scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))[np.argmax(hist_cluster)] )

			msg = "### Cluster %d (weight=%.4f, #involved nodes=%d, representative='%s'):"%(i+1, cluster_weights[i], len(involved_nodes), max_chi_node.name)
			print "\n"+msg
			print "-- internal coordinates --"
			print "%s"%pformat(["%.2f"%cm for cm in c_max])
			print "-- involved nodes --"
			print "%s"%pformat([n.name for n in involved_nodes])			
			print "-"*len(msg)
Example #15
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if (options.common_filename):
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert (path.exists(options.molecule))
    assert (path.exists(options.presampling))
    assert (path.exists(options.internals))
    assert (path.exists(options.grompp))
    assert (path.exists(options.topology))

    #TODO: what if there is no index-file? (make_ndx)
    assert (path.exists(options.index))
    assert ('moi' in gromacs.read_index_file(
        options.index)), "group 'MOI' should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    temperatures = [
        ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])
    ]
    assert (len(set(temperatures)) == 1
            ), "temperature definition in mdp file is ambiguous"
    temperature = temperatures[0]

    # get sampling temperature from mdp file
    if (int(temperature) > 310):
        if not (userinput(
                "Your sampling temperature is set to %s K. Continue?" %
                temperature, "bool")):
            sys.exit("Quit by user.")

    # options we can fix
    mdp_options_dirty = False  #if set, a new mdp-file will be written

    # the value of the following options need to be fixed
    critical_mdp_options = {
        "dihre": "yes",
        "dihre_fc": "1",
        "disre": "simple",
        "disre_fc": "1",
        "gen_temp": temperature
    }
    for (k, v) in critical_mdp_options.items():
        if (mdp_options.has_key(k) and mdp_options[k].strip() != v):
            print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file." % (
                mdp_options[k].strip(), k, v)
            sys.exit("Quitting.")
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    # the value of the following options does not matter, but they should be there
    noncritical_mdp_options = {
        "tcoupl": "no",
        "pcoupl": "no",
        "gen_vel": "no",
        "gen_seed": "-1"
    }
    for (k, v) in noncritical_mdp_options.items():
        if not (mdp_options.has_key(k)):
            mdp_options[k] = v
            mdp_options_dirty = True

    a = mdp_options.has_key("energygrps") and "moi" not in [
        str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])
    ]
    b = not (mdp_options.has_key("energygrps"))
    if (a or b):
        if not (userinput(
                "'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?",
                "bool")):
            sys.exit("Quit by user.")

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if (a and not b):
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif (b and not a):
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif (b and a):
        assert (mdp_options["nstxout"] == mdp_options["nstenergy"]
                ), "nstxout should equal nstenergy"

    if (int(mdp_options["nsteps"]) > 1e6):
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(
            userinput(msg, "int", default=int(mdp_options["nsteps"])))

    # create a fixed mdp-file
    if (mdp_options_dirty):
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in sorted(mdp_options.items()):
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

    # check if subsampling is reasonable
    if (os.path.getsize(options.presampling) > 100e6):  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if (dt < 10):  # picoseconds
            #TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if (userinput(msg, "bool")):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = [
                    "trjconv", "-f", options.presampling, "-o", out_fn,
                    "-skip", "10"
                ]
                check_call(cmd)
                options.presampling = out_fn

    # balance linears
    if (options.balance_linears):
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if (not isinstance(c, LinearCoordinate)):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
            #TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms,
                                         label=c.label,
                                         weight=new_weight,
                                         offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert (old_converter.filename.endswith(".int"))
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert (len(Converter(options.internals)) == len(new_coord_list)
                )  #try parsing

    # Finally: Create root-node and pool
    pool = Pool()
    if (len(pool) != 0):
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = int(temperature)
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  #... now we have to save the pool again.

    if (not path.exists("analysis")):
        os.mkdir("analysis")
Example #16
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	zgf_cleanup.main()
	
	pool = Pool()
	active_nodes = pool.where("isa_partition")
	
	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if active_nodes.where("'weight_direct' not in obs"):
		active_nodes.unlock()
		sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.")
	
	print "\n### Getting S matrix ..."
	s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat)
	register_file_dependency(pool.s_mat_fn, pool.filename)

	print "\n### Getting K matrix ..."
	k_matrix = cache_matrix(pool.k_mat_fn, active_nodes, shift=options.lag_time, overwrite=options.overwrite_mat)
	register_file_dependency(pool.k_mat_fn, pool.filename)	

	node_weights = np.array([node.obs.weight_direct for node in active_nodes])
	
	print "\n### Symmetrizing S matrix ..."
	(corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error))
	print "\n### Symmetrizing K matrix ..."
	(corr_k_matrix, corr_node_weights) = symmetrize(k_matrix, corr_node_weights)

	# store intermediate results
	register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn)
	register_file_dependency(pool.k_corr_mat_fn, pool.k_mat_fn)
	np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes])
	np.savez(pool.k_corr_mat_fn, matrix=corr_k_matrix, node_names=[n.name for n in active_nodes])
	
	if options.export_matlab:
		savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights})
		savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix})
		savemat(pool.analysis_dir+"k_mats.mat", {"k_matrix":k_matrix, "k_matrix_corrected":corr_k_matrix})
	
	for (n, cw) in zip(active_nodes, corr_node_weights):
		n.obs.weight_corrected = cw
		
	print "\n### Node weights after symmetrization of S matrix:"
	for n in active_nodes:
		print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected))
		n.save()

	active_nodes.unlock()

	# calculate and sort eigenvalues in descending order
	(eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix)
	argsorted_eigvalues = np.argsort(-eigvalues)
	eigvalues = eigvalues[argsorted_eigvalues]
	eigvectors = eigvectors[:, argsorted_eigvalues]
	
	gaps = np.abs(eigvalues[1:]-eigvalues[:-1])
	gaps = np.append(gaps, 0.0)
	wgaps = gaps*eigvalues

	print "\n### Sorted eigenvalues of symmetrized S matrix:"
	for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps):
		print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap)
	n_clusters = np.argmax(wgaps)+1
	print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters)
	print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1)
	sys.stdout.flush()
	if not options.auto_cluster:
		n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0")
	print "### Using %d clusters for PCCA+ ..."%n_clusters

	print "eigenvectors"
	print eigvectors[:, :n_clusters]

	if options.export_matlab:
		savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors})
	
	# orthogonalize and normalize eigenvectors 
	eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights)

	# perform PCCA+
	# First two return-values "c_f" and "indicator" are not needed
	(chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:]
	
	#TODO at the moment, K-matrix is not used
	#xi = [] # calculate eigenvalues of Q_c, xi
	#for eigvec in np.transpose(eigvectors)[: n_clusters]:
	#	num = np.dot( np.dot( np.transpose(eigvec), corr_k_matrix ), eigvec )
	#	denom = np.dot( np.dot( np.transpose(eigvec), corr_s_matrix ), eigvec )
	#	xi.append(num/denom-1)

	#print np.diag(xi) #TODO what does this tell us? Marcus-check

	qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters)

	cluster_weights = rot_matrix[0]

	print "Q_c matrix:"
	print qc_matrix
	print "Q_c matrix row sums:"
	print np.sum(qc_matrix, axis=1)
	print "cluster weights (calculated twice for checking):"
	print cluster_weights
	print np.dot(corr_node_weights, chi_matrix)
	print "chi matrix column sums:"
	print np.sum(chi_matrix, axis=0)
	print "chi matrix row sums:"
	print np.sum(chi_matrix, axis=1)

	# store final results
	np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes])
	np.savez(pool.qc_mat_fn,  matrix=qc_matrix,  n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights)

	if options.export_matlab:
		
		savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix})
		savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights})

	register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn)
	register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn)
	for fn in (pool.s_mat_fn, pool.s_corr_mat_fn, pool.k_mat_fn, pool.k_corr_mat_fn):
		register_file_dependency(pool.chi_mat_fn, fn)
		register_file_dependency(pool.qc_mat_fn, fn)
		
	zgf_cleanup.main()