Example #1
0
def write_node_preview(pool, parent, chosen_idx):
	assert(chosen_idx == sorted(chosen_idx))
	
	print "chosen_idx", chosen_idx
	trr_out_tmp_fn = mktemp(suffix='.trr')
	trr_out_tmp = open(trr_out_tmp_fn, "wb")
	
	trr_in = TrrFile(parent.trr_fn)
	curr_frame = trr_in.first_frame
	for i in chosen_idx:
		for dummy in range(i - curr_frame.number):
			curr_frame = curr_frame.next()
		assert(curr_frame.number == i)
		trr_out_tmp.write(curr_frame.raw_data)
	trr_in.close()
	trr_out_tmp.close()
	
	node_preview_fn = "node_preview_from_" + parent.name + ".pdb"	
	cmd = ["trjconv", "-f", trr_out_tmp_fn, "-o", node_preview_fn, "-s", parent.pdb_fn, "-n", pool.ndx_fn] 
	p = Popen(cmd, stdin=PIPE)
	p.communicate(input="MOI\n")
	assert(p.wait() == 0)
	os.remove(trr_out_tmp_fn)

	print "Node preview (MOI only) written to file: %s" % node_preview_fn		
Example #2
0
def get_merged_edr(node):
	# get list of edr files
	edr_fnames = sorted([ node.dir+"/"+fn for fn in os.listdir(node.dir) if re.match("[^#].+run\d+.edr", fn) ])
	assert( len(edr_fnames) ==  node.extensions_max+1 )

	# find out about trr time step
	trr = TrrFile(node.trr_fn)
	dt = trr.first_frame.next().t - trr.first_frame.t
	trr.close()
	# dt is sometimes noisy in the final digits (three digits is femtosecond step = enough)
	dt = np.around(dt, decimals=3)

	time_offset = node.sampling_length+dt

	for edr_fn in edr_fnames[1:]:	
		# adapt edr starting times
		cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"]
		print("Calling: "+(" ".join(cmd)))
		p = Popen(cmd, stdin=PIPE)
		p.communicate(input=(str(time_offset)+"\n"))
		assert(p.wait() == 0)

		time_offset += node.extensions_length+dt

	# concatenate edr files with adapted starting times
	cmd = ["eneconv", "-f"] + edr_fnames + ["-o", node.dir+"/ener.edr"]
	print("Calling: "+(" ".join(cmd)))
	p = Popen(cmd)
	retcode = p.wait()
	assert(retcode == 0)
Example #3
0
    def read_trajectory(self, fn):
        """
		Reads a trr-trajectory, resolves periodic boundary conditions
		with L{PbcResolver} and calculates internal coordinates.
				
		@param fn: filename of a gromacs trr trajectory.
		@return: L{InternalArray}
		"""
        required_atoms = set(sum([c.atoms for c in self], ()))
        atoms_start = min(required_atoms)
        atoms_end = max(required_atoms) + 1
        f_trr = TrrFile(fn)
        (frames_x, frames_box) = f_trr.read_frames(atoms_start,
                                                   atoms_end,
                                                   read_boxes=True)
        f_trr.close()
        pbc = PbcResolver(frames_box)

        def dx_provider(atom1, atom2):
            ai = frames_x[:, atom1 - atoms_start, :]
            aj = frames_x[:, atom2 - atoms_start, :]
            return (pbc.rvec_sub(ai, aj))

        array = np.column_stack([c.from_externals(dx_provider) for c in self])
        return (InternalArray(self, array))
Example #4
0
def main():
	if(len(sys.argv) != 3):
		print("Takes a trr-trajectory and converts it into a mat-file for matlab.")
		print("Usage: trr2mat.py <trr-input-file> <mat-output-file>")
		sys.exit(1)
	
	trr_fn = sys.argv[1]
	mat_fn = sys.argv[2]
	
	print("Opening: %s"%trr_fn)
	
	f = TrrFile(trr_fn)
	data = f.read_frames()
	f.close()
	print("Loaded trr-file with shape: "+str(data.shape))
	
	print("Writing: %s"%mat_fn)	
	savemat(mat_fn, {"trr":data})
Example #5
0
def extract_frames(pool):
    needy_nodes = pool.where("state == 'created'")

    # we want to scan through a parent-trr only once - saves time
    parents = set([n.parent for n in needy_nodes])
    for p in parents:
        childs = [n for n in needy_nodes if n.parent == p]
        childs.sort(key=lambda x: x.parent_frame_num)
        trr_in = TrrFile(p.trr_fn)

        frame = trr_in.first_frame
        for n in childs:
            for dummy in range(n.parent_frame_num - frame.number):
                frame = frame.next()
            assert (frame.number == n.parent_frame_num)
            trr_tmp_fn = mktemp(suffix='.trr')
            trr_tmp = open(trr_tmp_fn, "wb")
            trr_tmp.write(frame.raw_data)
            trr_tmp.close()
            cmd = [
                "trjconv", "-f", trr_tmp_fn, "-o", n.pdb_fn, "-s",
                n.parent.pdb_fn
            ]
            p = Popen(cmd, stdin=PIPE)
            p.communicate(input="System\n")
            assert (p.wait() == 0)
            os.remove(trr_tmp_fn)
        trr_in.close()

    # Check if the right frames where extracted
    # In principle, PDB coordinates should have a precision of 1e-4 nm
    # beause they are given in Angström with three decimal places.

    for n in needy_nodes:
        a = pool.converter.read_pdb(n.pdb_fn)
        d = np.max(np.abs(n.internals.array - a.array))
        print n.name + ": pdb vs internals deviation: %.2e" % np.max(
            np.abs(n.internals.array - a.array))
        assert (1e-2 > d)
def main():
    options = options_desc.parse_args(sys.argv)[0]

    zgf_cleanup.main()

    pool = Pool()
    npz_file = np.load(pool.chi_mat_fn)
    chi_matrix = npz_file['matrix']
    node_names = npz_file['node_names']
    n_clusters = npz_file['n_clusters']
    active_nodes = [Node(nn) for nn in node_names]

    # create and open dest_files, intialize counters for statistics
    dest_filenames = [
        pool.analysis_dir + "cluster%d.trr" % (c + 1)
        for c in range(n_clusters)
    ]
    dest_files = [open(fn, "wb") for fn in dest_filenames]
    dest_frame_counters = np.zeros(n_clusters)

    # For each active node...
    for (i, n) in enumerate(active_nodes):
        # ... find the clusters to which it belongs (might be more than one)...
        belonging_clusters = np.argwhere(
            chi_matrix[i] > options.node_threshold)

        # ... and find all typical frames of this node.
        #TODO not an optimal solution... discuss
        # per default, we take every frame with above average weight
        frame_threshold = options.frame_threshold * 2 * np.mean(n.frameweights)
        typical_frame_nums = np.argwhere(n.frameweights > frame_threshold)

        # Go through the node's trajectory ...
        trr_in = TrrFile(n.trr_fn)
        curr_frame = trr_in.first_frame
        for i in typical_frame_nums:
            # ...stop at each typical frame...
            while (i != curr_frame.number):
                curr_frame = curr_frame.next()
            assert (curr_frame.number == i)
            #... and copy it into the dest_file of each belonging cluster.
            for c in belonging_clusters:
                dest_files[c].write(curr_frame.raw_data)
                dest_frame_counters[c] += 1
        trr_in.close()  # close source file

    # close dest_files
    for f in dest_files:
        f.close()
    del (dest_files)

    # desolvate cluster-trajectories 'in-place'
    if (not options.write_sol):
        for dest_fn in dest_filenames:
            tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir)
            os.rename(dest_fn, tmp_fn)  # works as both files are in same dir
            cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn]
            p = Popen(cmd, stdin=PIPE)
            p.communicate(input="MOI\n")
            assert (p.wait() == 0)
            os.remove(tmp_fn)

    # register dependencies
    for fn in dest_filenames:
        register_file_dependency(fn, pool.chi_mat_fn)

    # check number of written frames
    sys.stdout.write("Checking lenghts of written trajectories... ")
    for i in range(n_clusters):
        f = TrrFile(dest_filenames[i])
        assert (f.count_frames() == dest_frame_counters[i])
        f.close()
    print("done.")

    #output statistics
    print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f" % (
        options.node_threshold, options.frame_threshold)
    print "Cluster trajectories were written to %s:" % pool.analysis_dir
    for (c, f) in enumerate(dest_frame_counters):
        print "cluster%d.trr [%d frames] from node(s):" % (c + 1, f)
        print list(np.argwhere(chi_matrix[:, c] > options.node_threshold).flat)
Example #7
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if (options.common_filename):
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert (path.exists(options.molecule))
    assert (path.exists(options.presampling))
    assert (path.exists(options.internals))
    assert (path.exists(options.grompp))
    assert (path.exists(options.topology))

    #TODO: what if there is no index-file? (make_ndx)
    assert (path.exists(options.index))
    assert ('moi' in gromacs.read_index_file(
        options.index)), "group 'MOI' should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    temperatures = [
        ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])
    ]
    assert (len(set(temperatures)) == 1
            ), "temperature definition in mdp file is ambiguous"
    temperature = temperatures[0]

    # get sampling temperature from mdp file
    if (int(temperature) > 310):
        if not (userinput(
                "Your sampling temperature is set to %s K. Continue?" %
                temperature, "bool")):
            sys.exit("Quit by user.")

    # options we can fix
    mdp_options_dirty = False  #if set, a new mdp-file will be written

    # the value of the following options need to be fixed
    critical_mdp_options = {
        "dihre": "yes",
        "dihre_fc": "1",
        "disre": "simple",
        "disre_fc": "1",
        "gen_temp": temperature
    }
    for (k, v) in critical_mdp_options.items():
        if (mdp_options.has_key(k) and mdp_options[k].strip() != v):
            print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file." % (
                mdp_options[k].strip(), k, v)
            sys.exit("Quitting.")
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    # the value of the following options does not matter, but they should be there
    noncritical_mdp_options = {
        "tcoupl": "no",
        "pcoupl": "no",
        "gen_vel": "no",
        "gen_seed": "-1"
    }
    for (k, v) in noncritical_mdp_options.items():
        if not (mdp_options.has_key(k)):
            mdp_options[k] = v
            mdp_options_dirty = True

    a = mdp_options.has_key("energygrps") and "moi" not in [
        str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])
    ]
    b = not (mdp_options.has_key("energygrps"))
    if (a or b):
        if not (userinput(
                "'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?",
                "bool")):
            sys.exit("Quit by user.")

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if (a and not b):
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif (b and not a):
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif (b and a):
        assert (mdp_options["nstxout"] == mdp_options["nstenergy"]
                ), "nstxout should equal nstenergy"

    if (int(mdp_options["nsteps"]) > 1e6):
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(
            userinput(msg, "int", default=int(mdp_options["nsteps"])))

    # create a fixed mdp-file
    if (mdp_options_dirty):
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in sorted(mdp_options.items()):
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

    # check if subsampling is reasonable
    if (os.path.getsize(options.presampling) > 100e6):  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if (dt < 10):  # picoseconds
            #TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if (userinput(msg, "bool")):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = [
                    "trjconv", "-f", options.presampling, "-o", out_fn,
                    "-skip", "10"
                ]
                check_call(cmd)
                options.presampling = out_fn

    # balance linears
    if (options.balance_linears):
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if (not isinstance(c, LinearCoordinate)):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
            #TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms,
                                         label=c.label,
                                         weight=new_weight,
                                         offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert (old_converter.filename.endswith(".int"))
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert (len(Converter(options.internals)) == len(new_coord_list)
                )  #try parsing

    # Finally: Create root-node and pool
    pool = Pool()
    if (len(pool) != 0):
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = int(temperature)
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  #... now we have to save the pool again.

    if (not path.exists("analysis")):
        os.mkdir("analysis")
def main():

    options = options_desc.parse_args(sys.argv)[0]
    pool = Pool()

    needy_nodes = pool.where("state == 'merge-able'").multilock()

    if (len(needy_nodes) == 0):
        return

    # find out about trr time step
    dt = 0
    nodeDir = needy_nodes[0].dir.split('/')[-1]
    for fn in os.listdir(needy_nodes[0].dir):
        if re.match("^" + nodeDir + ".+run\d+\.trr", fn):
            trr = TrrFile(needy_nodes[0].dir + "/" + fn)
            dt = trr.first_frame.next().t - trr.first_frame.t
            trr.close()
            break

    # dt is sometimes noisy in the final digits (three digits is femtosecond step = enough)
    dt = np.around(dt, decimals=3)
    for n in needy_nodes:

        if (options.trr):
            # merge sampling trajectories
            trr_fns = sorted([
                fn for fn in os.listdir(n.dir)
                if re.match("[^#].+run\d+.trr", fn)
            ])
            cmd = ["trjcat", "-f"]
            cmd += trr_fns
            cmd += ["-o", "../../" + n.trr_fn, "-cat"]
            print("Calling: %s" % " ".join(cmd))
            check_call(cmd, cwd=n.dir)

        if (options.edr):
            # merge edr files
            # get list of edr-files
            edr_fnames = sorted([
                n.dir + "/" + fn for fn in os.listdir(n.dir)
                if re.match("[^#].+run\d+.edr", fn)
            ])
            assert (len(edr_fnames) == n.extensions_counter + 1)
            assert (len(edr_fnames) == n.extensions_max + 1)

            time_offset = n.sampling_length + dt

            for edr_fn in edr_fnames[1:]:
                # adapt edr starting times
                cmd = ["eneconv", "-f", edr_fn, "-o", edr_fn, "-settime"]
                print("Calling: " + (" ".join(cmd)))
                p = Popen(cmd, stdin=PIPE)
                p.communicate(input=(str(time_offset) + "\n"))
                assert (p.wait() == 0)

                time_offset += n.extensions_length + dt

            # concatenate edr files with adapted starting times
            cmd = ["eneconv", "-f"] + edr_fnames + ["-o", n.dir + "/ener.edr"]
            print("Calling: " + (" ".join(cmd)))
            p = Popen(cmd)
            retcode = p.wait()
            assert (retcode == 0)

    needy_nodes.unlock()