Ejemplo n.º 1
0
def gmx_benzene_vdw_dHdl():
    dataset = alchemtest.gmx.load_benzene()

    dHdl = pd.concat([gmx.extract_dHdl(filename, T=300)
                      for filename in dataset['data']['VDW']])

    return dHdl
Ejemplo n.º 2
0
def test_nounit():
    '''Test no unit error'''
    dataset = load_benzene()
    dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310)
    dhdl.attrs.pop('energy_unit', None)
    with pytest.raises(TypeError):
        to_kT(dhdl)
Ejemplo n.º 3
0
def test_noT():
    '''Test no temperature error'''
    dataset = load_benzene()
    dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310)
    dhdl.attrs.pop('temperature', None)
    with pytest.raises(TypeError):
        to_kT(dhdl)
Ejemplo n.º 4
0
def gmx_water_particle_without_energy_dHdl():
    dataset = alchemtest.gmx.load_water_particle_without_energy()

    dHdl = alchemlyb.concat([gmx.extract_dHdl(filename, T=300)
                      for filename in dataset['data']['AllStates']])

    return dHdl
Ejemplo n.º 5
0
def gmx_expanded_ensemble_case_3_dHdl():
    dataset = alchemtest.gmx.load_expanded_ensemble_case_3()

    dHdl = pd.concat([gmx.extract_dHdl(filename, T=300)
                      for filename in dataset['data']['AllStates']])

    return dHdl
Ejemplo n.º 6
0
 def test_equilibrium_detection(self, dhdl):
     '''Test if extract_u_nk assign the attr correctly'''
     dataset = load_benzene()
     dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310)
     new_dhdl = equilibrium_detection(dhdl)
     assert new_dhdl.attrs['temperature'] == 310
     assert new_dhdl.attrs['energy_unit'] == 'kT'
Ejemplo n.º 7
0
def gmx_benzene_coul_dHdl():
    dataset = alchemtest.gmx.load_benzene()

    dHdl = alchemlyb.concat([gmx.extract_dHdl(filename, T=300)
                      for filename in dataset['data']['Coulomb']])

    return dHdl
Ejemplo n.º 8
0
def gmx_expanded_ensemble_case_2_dHdl():
    dataset = alchemtest.gmx.load_expanded_ensemble_case_2()

    dHdl = alchemlyb.concat([gmx.extract_dHdl(filename, T=300, filter=False)
                      for filename in dataset['data']['AllStates']])

    return dHdl
Ejemplo n.º 9
0
 def test_sanity(self, data, tmp_path):
     '''Test if the test routine is working.'''
     text, length = data
     new_text = tmp_path / 'text.xvg'
     new_text.write_text(text)
     dhdl = extract_dHdl(new_text, 310)
     assert len(dhdl) == length
Ejemplo n.º 10
0
 def test_statistical_inefficiency(self, dhdl):
     '''Test if extract_u_nk assign the attr correctly'''
     dataset = load_benzene()
     dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310)
     new_dhdl = statistical_inefficiency(dhdl)
     assert new_dhdl.attrs['temperature'] == 310
     assert new_dhdl.attrs['energy_unit'] == 'kT'
Ejemplo n.º 11
0
 def test_truncated_row(self, data, tmp_path):
     '''Test the case where the last row has been truncated.'''
     text, length = data
     new_text = tmp_path / 'text.xvg'
     new_text.write_text(text + '40010.0 27.0\n')
     dhdl = extract_dHdl(new_text, 310, filter=True)
     assert len(dhdl) == length
Ejemplo n.º 12
0
def test_plot_dF_state():
    '''Just test if the plot runs'''
    bz = load_benzene().data
    u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']])
    dHdl_coul = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']])
    u_nk_vdw = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['VDW']])
    dHdl_vdw = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['VDW']])

    ti_coul = TI().fit(dHdl_coul)
    ti_vdw = TI().fit(dHdl_vdw)
    bar_coul = BAR().fit(u_nk_coul)
    bar_vdw = BAR().fit(u_nk_vdw)
    mbar_coul = MBAR().fit(u_nk_coul)
    mbar_vdw = MBAR().fit(u_nk_vdw)

    dhdl_data = [
        (ti_coul, ti_vdw),
        (bar_coul, bar_vdw),
        (mbar_coul, mbar_vdw),
    ]
    fig = plot_dF_state(dhdl_data, orientation='portrait')
    assert isinstance(fig, matplotlib.figure.Figure)
    fig = plot_dF_state(dhdl_data, orientation='landscape')
    assert isinstance(fig, matplotlib.figure.Figure)
    fig = plot_dF_state(dhdl_data, labels=['MBAR', 'TI', 'BAR'])
    assert isinstance(fig, matplotlib.figure.Figure)
    with pytest.raises(ValueError):
        fig = plot_dF_state(dhdl_data, labels=[
            'MBAR',
            'TI',
        ])
    fig = plot_dF_state(dhdl_data, colors=['#C45AEC', '#33CC33', '#F87431'])
    assert isinstance(fig, matplotlib.figure.Figure)
    with pytest.raises(ValueError):
        fig = plot_dF_state(dhdl_data, colors=['#C45AEC', '#33CC33'])
    with pytest.raises(NameError):
        fig = plot_dF_state(dhdl_data, orientation='xxx')
    fig = plot_dF_state(ti_coul, orientation='landscape')
    assert isinstance(fig, matplotlib.figure.Figure)
    fig = plot_dF_state(ti_coul, orientation='portrait')
    assert isinstance(fig, matplotlib.figure.Figure)
    fig = plot_dF_state([ti_coul, bar_coul])
    assert isinstance(fig, matplotlib.figure.Figure)
    fig = plot_dF_state([(ti_coul, ti_vdw)])
    assert isinstance(fig, matplotlib.figure.Figure)
Ejemplo n.º 13
0
def gmx_water_particle_with_total_energy_dHdl():
    dataset = alchemtest.gmx.load_water_particle_with_total_energy()

    dHdl = [
        gmx.extract_dHdl(filename, T=300)
        for filename in dataset['data']['AllStates']
    ]

    return dHdl
Ejemplo n.º 14
0
 def test_too_many_cols(self, data, tmp_path):
     '''Test the case where the row has too many columns.'''
     text, length = data
     new_text = tmp_path / 'text.xvg'
     new_text.write_text(
         text +
         '40010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 13.5 20.2 27.0 0.7\n'
     )
     dhdl = extract_dHdl(new_text, 310, filter=True)
     assert len(dhdl) == length
Ejemplo n.º 15
0
    def estimaters():
        bz = load_benzene().data
        dHdl_coul = alchemlyb.concat(
            [extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']])
        ti = TI().fit(dHdl_coul)

        u_nk_coul = alchemlyb.concat(
            [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']])
        mbar = MBAR().fit(u_nk_coul)

        return ti, mbar
Ejemplo n.º 16
0
 def test_weirdnumber(self, data, tmp_path):
     '''Test the case where the last number has been appended a weird
     number.'''
     text, length = data
     new_text = tmp_path / 'text.xvg'
     # Note the 27.040010.0 which is the sum of 27.0 and 40010.0
     new_text.write_text(
         text +
         '40010.0 27.040010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 '
         '13.5 20.2 27.0 0.7\n')
     dhdl = extract_dHdl(new_text, 310, filter=True)
     assert len(dhdl) == length
Ejemplo n.º 17
0
def test_dHdl():
    """Test that dHdl has the correct form when extracted from files.

    """
    dataset = load_benzene()

    for leg in dataset['data']:
        for filename in dataset['data'][leg]:
            dHdl = extract_dHdl(filename, T=300)

            assert dHdl.index.names == ['time', 'fep-lambda']
            assert dHdl.shape == (4001, 1)
Ejemplo n.º 18
0
def test_plot_ti_dhdl():
    '''Just test if the plot runs'''
    bz = load_benzene().data
    dHdl_coul = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']])
    ti_coul = TI()
    ti_coul.fit(dHdl_coul)
    assert isinstance(plot_ti_dhdl(ti_coul), matplotlib.axes.Axes)
    fig, ax = plt.subplots(figsize=(8, 6))
    assert isinstance(plot_ti_dhdl(ti_coul, ax=ax), matplotlib.axes.Axes)
    assert isinstance(plot_ti_dhdl(ti_coul, labels=['Coul']),
                      matplotlib.axes.Axes)
    assert isinstance(plot_ti_dhdl(ti_coul, labels=['Coul'], colors=['r']),
                      matplotlib.axes.Axes)
    dHdl_vdw = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['VDW']])
    ti_vdw = TI().fit(dHdl_vdw)
    assert isinstance(plot_ti_dhdl([ti_coul, ti_vdw]), matplotlib.axes.Axes)
    ti_coul.dhdl = pd.DataFrame.from_dict({
        'fep': range(100)
    },
                                          orient='index',
                                          columns=np.arange(100) / 100).T
    assert isinstance(plot_ti_dhdl(ti_coul), matplotlib.axes.Axes)
Ejemplo n.º 19
0
def test_dHdl_case1():
    """Test that dHdl has the correct form when extracted from expanded ensemble files (case 1).

    """
    dataset = load_expanded_ensemble_case_1()

    for leg in dataset['data']:
        for filename in dataset['data'][leg]:
            dHdl = extract_dHdl(filename, T=300, filter=False)

            assert dHdl.index.names == [
                'time', 'fep-lambda', 'coul-lambda', 'vdw-lambda',
                'restraint-lambda'
            ]
            assert dHdl.shape == (50001, 4)
Ejemplo n.º 20
0
def get_dHdl_XVG_delayed(xvg):
    # TODO
    # apply extract_dHdl_updated3
    # merge get_header, extract_state
    # cython for this?
    # don't forget cache read by bytesio
    fsize = os.path.getsize(xvg.abspath)
    bufsize = 8192
    stopat = fsize / bufsize / 2

    s0 = time.time()
    bread(xvg.abspath, bsize=bufsize, stopat=stopat)
    s1 = time.time()
    msg = ("{},{},{},{},{},{}".format('get_dHdl_XVG_delayed', 'bread',
        xvg.abspath, s1-s0, s1, s0))
    #print(msg)
    logging.info(msg)
    dHdl = extract_dHdl(xvg.abspath, T=T)
    s2 = time.time()
    msg = ("{},{},{},{},{},{}".format('get_dHdl_XVG_delayed',
        '_extract_dHdl', xvg.abspath, s2-s1, s1, s0))
    #print(msg)
    logging.info(msg)
    return dHdl
Ejemplo n.º 21
0
def test_extract_dHdl_unit():
    '''Test if extract_u_nk assign the attr correctly'''
    dataset = load_benzene()
    dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310)
    assert dhdl.attrs['temperature'] == 310
    assert dhdl.attrs['energy_unit'] == 'kT'
Ejemplo n.º 22
0
 def dhdl():
     data = load_ABFE()['data']['complex']
     dhdl = alchemlyb.concat(
         [extract_dHdl(data[i], 300) for i in range(30)])
     return dhdl
Ejemplo n.º 23
0
 def dhdl():
     bz = load_benzene().data
     dHdl_coul = alchemlyb.concat(
         [extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']])
     return dHdl_coul
Ejemplo n.º 24
0
 def data():
     dhdl = extract_dHdl(load_benzene()['data']['Coulomb'][0], 310)
     with bz2.open(load_benzene()['data']['Coulomb'][0], "rt") as bz_file:
         text = bz_file.read()
     return text, len(dhdl)
Ejemplo n.º 25
0
def gmx_benzene():
    dataset = load_benzene()
    return [gmx.extract_dHdl(dhdl, T=300) for dhdl in dataset['data']['Coulomb']], \
           [gmx.extract_u_nk(dhdl, T=300) for dhdl in dataset['data']['Coulomb']]
Ejemplo n.º 26
0
def gmx_ABFE_dhdl():
    dataset = alchemtest.gmx.load_ABFE()
    return gmx.extract_dHdl(dataset['data']['complex'][0], T=300)
Ejemplo n.º 27
0
def gmx_benzene_dHdl():
    dataset = alchemtest.gmx.load_benzene()
    return gmx.extract_dHdl(dataset['data']['Coulomb'][0], T=300)
Ejemplo n.º 28
0
 def dhdl():
     dataset = load_benzene()
     dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310)
     return dhdl
Ejemplo n.º 29
0
def gmx_benzene_dHdl_full():
    dataset = alchemtest.gmx.load_benzene()
    return pd.concat(
        [gmx.extract_dHdl(i, T=300) for i in dataset['data']['Coulomb']])
Ejemplo n.º 30
0
    def extract_data(self, dir, temp, dt):
        # extract and subsample dHdl using equilibrium_detection
        dHdl_state = []  # dHdl_state is for collecting data for a single state
        u_nk_state = []  # u_nk_state is for collecting data fro a single state

        if os.path.isfile('temporary.xvg') is True:
            os.system("rm temporary.xvg")
        files = glob.glob(os.path.join(dir, '*dhdl.xvg*'))
        files = natsort.natsorted(files, reverse=False)

        file_idx = -1
        n = 0  # counter for the number of files of a specific state
        self.n_state = 0  # counter for the number of states

        for i in track(files):
            n += 1
            file_idx += 1
            logger(f"Parsing {files[file_idx]} and collecting data ...")
            os.system(f"head -n-1 {i} > temporary.xvg"
                      )  # delete the last line in case it is incomplete
            dHdl_state.append(extract_dHdl('temporary.xvg', T=temp))
            u_nk_state.append(extract_u_nk('temporary.xvg', T=temp))

            if n > 1:  # for discard the overlapped time frames of the previous file
                upper_t = dHdl_state[-2].iloc[
                    dHdl_state[-2].shape[0] -
                    1].name[0]  # the last time frame of file n
                lower_t = dHdl_state[-1].iloc[0].name[
                    0]  # the first time frame of file n + 1
                # upper_t and lower_t should be the same for both dHdl and u_nk

                if lower_t != 0:  # in case that the file n+1 is the first file of the next replica
                    n_discard = int(
                        (upper_t - lower_t) / dt +
                        1)  # number of data frames to discard in file n
                    dHdl_state[-2] = dHdl_state[-2].iloc[:-n_discard]
                    u_nk_state[-2] = u_nk_state[-2].iloc[:-n_discard]
                else:  # lower_t == 0 means that we have gathered dHdl for the previous state
                    self.n_state += 1
                    dHdl_data = pd.concat(dHdl_state[:-1])
                    u_nk_data = pd.concat(u_nk_state[:-1])

                    dHdl.append(
                        equilibrium_detection(dHdl_data, dHdl_data.iloc[:, 0]))
                    dHdl_state = [dHdl_state[-1]]
                    logger(
                        f'Subsampling dHdl data of the {ordinal(self.n_state)} state ...'
                    )

                    u_nk.append(
                        equilibrium_detection(u_nk_data, u_nk_data.iloc[:, 0]))
                    u_nk_state = [u_nk_state[-1]]
                    logger(
                        f'Subsampling u_nk data of the {ordinal(self.n_state)} state ...'
                    )

                    n = 1  # now there is only one file loaded in dHdl_state/u_nk_state

        # dealing with the last state with equilibrium_detection
        self.n_state += 1
        dHdl_data = pd.concat(dHdl_state)
        u_nk_data = pd.concat(u_nk_state)

        return dHdl_data, u_nk_data