def write_dcd(in_name, out_name, remarks='testing', header=None): with DCDFile(in_name) as f_in, DCDFile(out_name, 'w') as f_out: if header is None: header = f_in.header f_out.write_header(**header) for frame in f_in: f_out.write(xyz=frame.xyz, box=frame.unitcell)
def test_written_remarks_property(remarks, tmpdir_factory): # property based testing for writing of a wide range of string # values to REMARKS field dcd = DCDFile(DCD) dirname = str(id(remarks)) + "_" testfile = str(tmpdir_factory.mktemp(dirname).join('test.dcd')) header = dcd.header header['remarks'] = remarks write_dcd(DCD, testfile, header=header) expected_remarks = remarks with DCDFile(testfile) as f: assert f.header['remarks'] == expected_remarks
def test_write_random_unitcell(tmpdir): testname = str(tmpdir.join('test.dcd')) rstate = np.random.RandomState(1178083) random_unitcells = rstate.uniform(high=80, size=(98, 6)).astype(np.float64) with DCDFile(DCD) as f_in, DCDFile(testname, 'w') as f_out: header = f_in.header header['is_periodic'] = True f_out.write_header(**header) for index, frame in enumerate(f_in): f_out.write(xyz=frame.xyz, box=random_unitcells[index]) with DCDFile(testname) as test: for index, frame in enumerate(test): assert_array_almost_equal(frame.unitcell, random_unitcells[index])
def test_read_coordsshape(dcdfile, natoms): # confirm shape of coordinate data against result from previous # MDAnalysis implementation of DCD file handling with DCDFile(dcdfile) as dcd: dcd_frame = dcd.read() xyz = dcd_frame[0] assert xyz.shape == (natoms, 3)
def written_dcd(tmpdir_factory): with DCDFile(DCD) as dcd: header = dcd.header testfile = tmpdir_factory.mktemp('dcd').join('test.dcd') testfile = str(testfile) write_dcd(DCD, testfile) Result = namedtuple("Result", "testfile, header, orgfile") return Result(testfile, header, DCD)
def test_readframes(dcdfile, legacy_data, frame_idx): legacy = np.load(legacy_data) with DCDFile(dcdfile) as dcd: frames = dcd.readframes() xyz = frames.xyz assert_equal(len(xyz), len(dcd)) for index, frame_num in enumerate(frame_idx): assert_array_almost_equal(xyz[frame_num], legacy[index])
def test_nframessize_int(dcdfile): # require that the (nframessize / framesize) value used by DCDFile # is an integer (because nframessize / framesize + 1 = total frames, # which must also be an int) filesize = os.path.getsize(dcdfile) with DCDFile(dcdfile) as dcd: nframessize = filesize - dcd._header_size - dcd._firstframesize assert float(nframessize) % float(dcd._framesize) == 0
def test_relative_frame_sizes(dcdfile): # the first frame of a DCD file should always be >= in size # to subsequent frames, as the first frame contains the same # atoms + (optional) fixed atoms with DCDFile(dcdfile) as dcd: first_frame_size = dcd._firstframesize general_frame_size = dcd._framesize assert first_frame_size >= general_frame_size
def written_dcd(tmpdir_factory): with DCDFile(DCD) as dcd: header = dcd.header testfile = tmpdir_factory.mktemp('dcd').join('test.dcd') testfile = str(testfile) write_dcd(DCD, testfile) Result = namedtuple("Result", "testfile, header, orgfile") # throw away last char we didn't save due to null termination header['remarks'] = header['remarks'][:-1] return Result(testfile, header, DCD)
def test_file_size_breakdown(dcdfile): # the size of a DCD file is equivalent to the sum of the header # size, first frame size, and (N - 1 frames) * size per general # frame expected = os.path.getsize(dcdfile) with DCDFile(dcdfile) as dcd: actual = dcd._header_size + dcd._firstframesize + ( (dcd.n_frames - 1) * dcd._framesize) assert actual == expected
def test_written_remarks_property(remarks, tmpdir, dcd): # property based testing for writing of a wide range of string # values to REMARKS field testfile = str(tmpdir.join('test.dcd')) header = dcd.header header['remarks'] = remarks write_dcd(DCD, testfile, header=header) expected_remarks = remarks with DCDFile(testfile) as f: assert f.header['remarks'] == expected_remarks
def test_read_coord_values(dcdfile, legacy_data, frames): # test the actual values of coordinates read in versus # stored values read in by the legacy DCD handling framework # to reduce repo storage burden, we only compare for a few # randomly selected frames legacy = np.load(legacy_data) with DCDFile(dcdfile) as dcd: for index, frame_num in enumerate(frames): dcd.seek(frame_num) actual_coords = dcd.read()[0] desired_coords = legacy[index] assert_array_equal(actual_coords, desired_coords)
def test_write_header(tmpdir): # test that _write_header() can produce a very crude # header for a new / empty file testfile = str(tmpdir.join('test.dcd')) with DCDFile(testfile, 'w') as dcd: dcd.write_header( remarks='Crazy!', natoms=22, istart=12, nsavc=10, delta=0.02, is_periodic=1) with DCDFile(testfile) as dcd: header = dcd.header assert header['remarks'] == 'Crazy!' assert header['natoms'] == 22 assert header['istart'] == 12 assert header['is_periodic'] == 1 assert header['nsavc'] == 10 assert np.allclose(header['delta'], .02)
def test_write_all_dtypes(tmpdir, dtype): fname = str(tmpdir.join('foo.dcd')) with DCDFile(fname, 'w') as out: natoms = 10 xyz = np.ones((natoms, 3), dtype=dtype) box = np.ones(6, dtype=dtype) out.write_header(remarks='test', natoms=natoms, is_periodic=1, delta=1, nsavc=1, istart=1) out.write(xyz=xyz, box=box)
def test_write_array_like(tmpdir, array_like): fname = str(tmpdir.join('foo.dcd')) with DCDFile(fname, 'w') as out: natoms = 10 xyz = array_like([[1, 1, 1] for i in range(natoms)]) box = array_like([i for i in range(6)]) out.write_header(remarks='test', natoms=natoms, is_periodic=1, delta=1, nsavc=1, istart=1) out.write(xyz=xyz, box=box)
def test_write_wrong_shape_box(tmpdir): fname = str(tmpdir.join('foo.dcd')) with DCDFile(fname, 'w') as out: natoms = 10 xyz = np.ones((natoms, 3)) box = np.ones(7) out.write_header(remarks='test', natoms=natoms, is_periodic=1, delta=1, nsavc=1, istart=1) with pytest.raises(ValueError): out.write(xyz=xyz, box=box)
def test_write_header(tmpdir): # test that _write_header() can produce a very crude # header for a new / empty file testfile = str(tmpdir.join('test.dcd')) with DCDFile(testfile, 'w') as dcd: dcd.write_header(remarks='Crazy!', natoms=22, istart=12, nsavc=10, delta=0.02, is_periodic=1) with DCDFile(testfile) as dcd: header = dcd.header assert header['remarks'] == 'Crazy!' assert header['natoms'] == 22 assert header['istart'] == 12 assert header['is_periodic'] == 1 assert header['nsavc'] == 10 assert np.allclose(header['delta'], .02) # we also check the bytes written directly. with open(testfile, 'rb') as fh: header_bytes = fh.read() # check for magic number assert struct.unpack('i', header_bytes[:4])[0] == 84 # magic number should be written again before remark section assert struct.unpack('i', header_bytes[88:92])[0] == 84 # length of remark section. We hard code this to 244 right now assert struct.unpack('i', header_bytes[92:96])[0] == 244 # say we have 3 block of length 80 assert struct.unpack('i', header_bytes[96:100])[0] == 3 # after the remark section the length should be reported again assert struct.unpack('i', header_bytes[340:344])[0] == 244 # this is a magic number as far as I see assert struct.unpack('i', header_bytes[344:348])[0] == 4
def test_write_header_twice(tmpdir): # an IOError should be raised if a duplicate # header writing is attempted header = { "remarks": 'Crazy!', "natoms": 22, "istart": 12, "nsavc": 10, "delta": 0.02, "is_periodic": 1 } fname = str(tmpdir.join('test.dcd')) with DCDFile(fname, 'w') as dcd: dcd.write_header(**header) with pytest.raises(IOError): dcd.write_header(**header)
def aa_fetch(): raw_filepath = "aa0.dcd" record_file = "aa.tfrecords" if os.path.isfile(record_file): print("Found existing record file, delete if you want to re-fetch") return record_file if not os.path.isfile(raw_filepath): print("Downloading AA data...", end="") urllib.request.urlretrieve( "https://ndownloader.figshare.com/files/1497002", raw_filepath) print("File downloaded") else: print( f"Found downloaded file {raw_filepath}, delete if you want to redownload" ) print("Converting...") try: import MDAnalysis from MDAnalysis.lib.formats.libdcd import DCDFile except ImportError: raise ImportError("Please install MDanalysis with pip first") with tf.io.TFRecordWriter( record_file, options=tf.io.TFRecordOptions(compression_type="GZIP")) as writer: with DCDFile(raw_filepath) as dcd: for frame in dcd: feature = { "coords": tf.train.Feature(float_list=tf.train.FloatList( value=frame.xyz.flatten())) } example = tf.train.Example(features=tf.train.Features( feature=feature)) writer.write(example.SerializeToString()) return record_file
def dcd(): with DCDFile(DCD) as dcd: yield dcd
def test_written_coord_match(written_dcd): with DCDFile(written_dcd.testfile) as test, DCDFile( written_dcd.orgfile) as ref: for frame, o_frame in zip(test, ref): assert_array_almost_equal(frame.xyz, o_frame.xyz)
def test_read_unit_cell(dcdfile, unit_cell): # confirm unit cell read against result from previous # MDAnalysis implementation of DCD file handling with DCDFile(dcdfile) as dcd: dcd_frame = dcd.read() assert_array_almost_equal(dcd_frame.unitcell, unit_cell)
def test_seek_over_max(): with DCDFile(DCD) as dcd: with pytest.raises(EOFError): dcd.seek(102)
def test_written_header(written_dcd): header = written_dcd.header with DCDFile(written_dcd.testfile) as dcd: dcdheader = dcd.header assert dcdheader == header
def test_write_no_header(tmpdir): fname = str(tmpdir.join('test.dcd')) with DCDFile(fname, 'w') as dcd: with pytest.raises(IOError): dcd.write(np.ones(3), np.ones(6))
def test_written_num_frames(written_dcd): with DCDFile(written_dcd.testfile) as dcd, DCDFile( written_dcd.orgfile) as other: assert len(dcd) == len(other)
def test_written_dcd_coordinate_data_shape(written_dcd): with DCDFile(written_dcd.testfile) as dcd, DCDFile( written_dcd.orgfile) as other: for frame, other_frame in zip(dcd, other): assert frame.xyz.shape == other_frame.xyz.shape
def test_written_seek(written_dcd): # ensure that we can seek properly on written DCD file with DCDFile(written_dcd.testfile) as f: f.seek(40) assert_equal(f.tell(), 40)
def test_is_periodic(dcdfile, is_periodic): with DCDFile(dcdfile) as f: assert f.is_periodic == is_periodic
def test_written_unit_cell(written_dcd): with DCDFile(written_dcd.testfile) as test, DCDFile( written_dcd.orgfile) as ref: for frame, o_frame in zip(test, ref): assert_array_almost_equal(frame.unitcell, o_frame.unitcell)