Esempio n. 1
0
    def test_read_write_tables(self, tmp_path):
        # Single-index columns
        df = pd.DataFrame(np.random.randn(25, 4), columns=list('ABCD'))
        df.index.name = 'singlecolumnindex'

        sio = StringIO()
        dataframe_to_stringio(df, sio)
        sio.seek(0)
        df_read = csv_to_dataframe(sio)
        pd.testing.assert_frame_equal(df, df_read)

        fpath = Path(tmp_path) / 'single_index.csv'
        dataframe_to_file(fpath, df)
        csv_to_dataframe(fpath)
        pd.testing.assert_frame_equal(df, df_read)

        # multi-index column
        cols = pd.MultiIndex.from_product([('a', 'b'), ('x', 'y')])
        df = pd.DataFrame(np.random.randn(25, 4), columns=cols)
        df.index.name = 'multicolumnindex'

        sio = StringIO()
        dataframe_to_stringio(df, sio)
        sio.seek(0)
        df_read = csv_to_dataframe(sio)
        pd.testing.assert_frame_equal(df, df_read)

        fpath = Path(tmp_path) / 'multi_index.csv'
        dataframe_to_file(fpath, df)
        df_read = csv_to_dataframe(fpath)
        pd.testing.assert_frame_equal(df, df_read)

        protein = csv_to_protein(fpath)
        assert protein.index.name == 'r_number'
        assert isinstance(protein, Protein)

        metadata = {
            'instrumuent': 'LCMS',
            'settings': {'pressure': '5 kPa', 'temperature': '400K'}
        }

        df.attrs['metadata'] = metadata

        fpath = Path(tmp_path) / 'multi_index_with_metadata.csv'
        dataframe_to_file(fpath, df)
        df_read = csv_to_dataframe(fpath)
        pd.testing.assert_frame_equal(df, df_read)

        assert df_read.attrs['metadata'] == metadata

        fpath = Path(tmp_path) / 'multi_index_with_metadata.txt'
        dataframe_to_file(fpath, df, fmt='pprint', include_version=True)
        lines = Path(fpath).read_text().split('\n')
        assert len(lines) == 38
        assert lines[0].strip() == pyhdx.VERSION_STRING
Esempio n. 2
0
 def to_file(
     self,
     file_path,
     include_version=True,
     include_metadata=True,
     fmt="csv",
     **kwargs,
 ):
     """save only output to file"""
     metadata = self.metadata if include_metadata else include_metadata
     dataframe_to_file(
         file_path,
         self.output,
         include_version=include_version,
         include_metadata=metadata,
         fmt=fmt,
         **kwargs,
     )
hdx_set = HDXMeasurementSet([st1, st2])
guess = csv_to_protein(data_dir / 'output' / 'ecSecB_guess.csv')
gibbs_guess = hdx_set[0].guess_deltaG(guess['rate'])


# Example fit with only 5000 epochs and high learning rate
# Checkpoint stores model history every `epoch_step` epochs
checkpoint = CheckPoint(epoch_step=250)
result = fit_gibbs_global_batch(hdx_set, gibbs_guess, r1=0.5, r2=0.1, epochs=5000, lr=1e5, callbacks=[checkpoint])
print(f"MSE loss: {result.mse_loss:.2f}, "
      f"Reg loss: {result.reg_loss:.2f}, "
      f"Reg percent: {result.regularization_percentage:.0f}%")


df = checkpoint.to_dataframe(hdx_set.names)
dataframe_to_file(output_dir / 'model_history.csv', df)
dataframe_to_file(output_dir / 'model_history.txt', df, fmt='pprint')


# Checkpoint history scatter plot
# Note that these are raw dG values including interpolated values in regions of no coverage
history = checkpoint.model_history
num = len(history)
cmap = mpl.cm.get_cmap('winter')
norm = mpl.colors.Normalize(vmin=1, vmax=num*checkpoint.epoch_step)
colors = iter(cmap(np.linspace(0, 1, num=num)))

fig, ax = plt.subplots()
for key, val in history.items():
    n = len(val['dG'].numpy().squeeze())
    ax.scatter(hdx_set.coverage.index, val['dG'].numpy().squeeze()[0], color=next(colors))
Esempio n. 4
0
                         remove_nan=False)
pmt.set_control(control)
temperature, pH = 273.15 + 30, 8.

hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'),
                      sequence=sequence,
                      temperature=temperature,
                      pH=pH)

data = pmt.get_state('SecB WT apo')
reduced_data = data[data['end'] < 40]
hdxm_reduced = HDXMeasurement(reduced_data, temperature=temperature, pH=pH)

result = fit_rates_weighted_average(hdxm_reduced)
reduced_guess = result.output
dataframe_to_file(output_dir / 'ecSecB_reduced_guess.csv', reduced_guess)
dataframe_to_file(output_dir / 'ecSecB_reduced_guess.txt',
                  reduced_guess,
                  fmt='pprint')

gibbs_guess = hdxm_reduced.guess_deltaG(reduced_guess['rate'])
fr_torch = fit_gibbs_global(hdxm_reduced, gibbs_guess, epochs=epochs, r1=2)
save_fitresult(output_dir / 'ecsecb_reduced', fr_torch)

if guess:
    wt_avg_result = fit_rates_weighted_average(hdxm,
                                               bounds=(1e-2 / 60., 800 / 60.))
    guess_output = wt_avg_result.output
    dataframe_to_file(output_dir / 'ecSecB_guess.csv', guess_output)
    dataframe_to_file(output_dir / 'ecSecB_guess.txt',
                      guess_output,
Esempio n. 5
0
"""Load HDX-MS data from yaml spec and perform initial guess of exchange rates"""
from pyhdx.batch_processing import yaml_to_hdxm
from pathlib import Path
from pyhdx.fitting import fit_rates_weighted_average
import yaml
from pyhdx.local_cluster import default_client
from pyhdx.fileIO import dataframe_to_file

current_dir = Path(__file__).parent
output_dir = current_dir / 'guesses'
output_dir.mkdir(exist_ok=True)
data_dir = current_dir.parent / 'tests' / 'test_data' / 'input'
yaml_stream = Path(current_dir / 'yaml_files' / 'SecB.yaml').read_text()
data_dict = yaml.safe_load(yaml_stream)

# Requires local_cluster.py to be running (or other Dask client on default address in config)
client = default_client()

for name, dic in data_dict.items():
    print(name)
    dic = data_dict[name]
    hdxm = yaml_to_hdxm(dic, data_dir=data_dir)

    # Save sequence info + intrinsic rates
    hdxm.coverage.protein.to_file(output_dir / f'{name}_sequence_info.txt',
                                  fmt='pprint')

    fr = fit_rates_weighted_average(hdxm, client=client)
    dataframe_to_file(output_dir / f'{name}_rates_guess.csv', fr.output)