def test_read_write_tables(self, tmp_path): # Single-index columns df = pd.DataFrame(np.random.randn(25, 4), columns=list('ABCD')) df.index.name = 'singlecolumnindex' sio = StringIO() dataframe_to_stringio(df, sio) sio.seek(0) df_read = csv_to_dataframe(sio) pd.testing.assert_frame_equal(df, df_read) fpath = Path(tmp_path) / 'single_index.csv' dataframe_to_file(fpath, df) csv_to_dataframe(fpath) pd.testing.assert_frame_equal(df, df_read) # multi-index column cols = pd.MultiIndex.from_product([('a', 'b'), ('x', 'y')]) df = pd.DataFrame(np.random.randn(25, 4), columns=cols) df.index.name = 'multicolumnindex' sio = StringIO() dataframe_to_stringio(df, sio) sio.seek(0) df_read = csv_to_dataframe(sio) pd.testing.assert_frame_equal(df, df_read) fpath = Path(tmp_path) / 'multi_index.csv' dataframe_to_file(fpath, df) df_read = csv_to_dataframe(fpath) pd.testing.assert_frame_equal(df, df_read) protein = csv_to_protein(fpath) assert protein.index.name == 'r_number' assert isinstance(protein, Protein) metadata = { 'instrumuent': 'LCMS', 'settings': {'pressure': '5 kPa', 'temperature': '400K'} } df.attrs['metadata'] = metadata fpath = Path(tmp_path) / 'multi_index_with_metadata.csv' dataframe_to_file(fpath, df) df_read = csv_to_dataframe(fpath) pd.testing.assert_frame_equal(df, df_read) assert df_read.attrs['metadata'] == metadata fpath = Path(tmp_path) / 'multi_index_with_metadata.txt' dataframe_to_file(fpath, df, fmt='pprint', include_version=True) lines = Path(fpath).read_text().split('\n') assert len(lines) == 38 assert lines[0].strip() == pyhdx.VERSION_STRING
def to_file( self, file_path, include_version=True, include_metadata=True, fmt="csv", **kwargs, ): """save only output to file""" metadata = self.metadata if include_metadata else include_metadata dataframe_to_file( file_path, self.output, include_version=include_version, include_metadata=metadata, fmt=fmt, **kwargs, )
hdx_set = HDXMeasurementSet([st1, st2]) guess = csv_to_protein(data_dir / 'output' / 'ecSecB_guess.csv') gibbs_guess = hdx_set[0].guess_deltaG(guess['rate']) # Example fit with only 5000 epochs and high learning rate # Checkpoint stores model history every `epoch_step` epochs checkpoint = CheckPoint(epoch_step=250) result = fit_gibbs_global_batch(hdx_set, gibbs_guess, r1=0.5, r2=0.1, epochs=5000, lr=1e5, callbacks=[checkpoint]) print(f"MSE loss: {result.mse_loss:.2f}, " f"Reg loss: {result.reg_loss:.2f}, " f"Reg percent: {result.regularization_percentage:.0f}%") df = checkpoint.to_dataframe(hdx_set.names) dataframe_to_file(output_dir / 'model_history.csv', df) dataframe_to_file(output_dir / 'model_history.txt', df, fmt='pprint') # Checkpoint history scatter plot # Note that these are raw dG values including interpolated values in regions of no coverage history = checkpoint.model_history num = len(history) cmap = mpl.cm.get_cmap('winter') norm = mpl.colors.Normalize(vmin=1, vmax=num*checkpoint.epoch_step) colors = iter(cmap(np.linspace(0, 1, num=num))) fig, ax = plt.subplots() for key, val in history.items(): n = len(val['dG'].numpy().squeeze()) ax.scatter(hdx_set.coverage.index, val['dG'].numpy().squeeze()[0], color=next(colors))
remove_nan=False) pmt.set_control(control) temperature, pH = 273.15 + 30, 8. hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), sequence=sequence, temperature=temperature, pH=pH) data = pmt.get_state('SecB WT apo') reduced_data = data[data['end'] < 40] hdxm_reduced = HDXMeasurement(reduced_data, temperature=temperature, pH=pH) result = fit_rates_weighted_average(hdxm_reduced) reduced_guess = result.output dataframe_to_file(output_dir / 'ecSecB_reduced_guess.csv', reduced_guess) dataframe_to_file(output_dir / 'ecSecB_reduced_guess.txt', reduced_guess, fmt='pprint') gibbs_guess = hdxm_reduced.guess_deltaG(reduced_guess['rate']) fr_torch = fit_gibbs_global(hdxm_reduced, gibbs_guess, epochs=epochs, r1=2) save_fitresult(output_dir / 'ecsecb_reduced', fr_torch) if guess: wt_avg_result = fit_rates_weighted_average(hdxm, bounds=(1e-2 / 60., 800 / 60.)) guess_output = wt_avg_result.output dataframe_to_file(output_dir / 'ecSecB_guess.csv', guess_output) dataframe_to_file(output_dir / 'ecSecB_guess.txt', guess_output,
"""Load HDX-MS data from yaml spec and perform initial guess of exchange rates""" from pyhdx.batch_processing import yaml_to_hdxm from pathlib import Path from pyhdx.fitting import fit_rates_weighted_average import yaml from pyhdx.local_cluster import default_client from pyhdx.fileIO import dataframe_to_file current_dir = Path(__file__).parent output_dir = current_dir / 'guesses' output_dir.mkdir(exist_ok=True) data_dir = current_dir.parent / 'tests' / 'test_data' / 'input' yaml_stream = Path(current_dir / 'yaml_files' / 'SecB.yaml').read_text() data_dict = yaml.safe_load(yaml_stream) # Requires local_cluster.py to be running (or other Dask client on default address in config) client = default_client() for name, dic in data_dict.items(): print(name) dic = data_dict[name] hdxm = yaml_to_hdxm(dic, data_dir=data_dir) # Save sequence info + intrinsic rates hdxm.coverage.protein.to_file(output_dir / f'{name}_sequence_info.txt', fmt='pprint') fr = fit_rates_weighted_average(hdxm, client=client) dataframe_to_file(output_dir / f'{name}_rates_guess.csv', fr.output)