Beispiel #1
0
    def test_global_fit(self):
        #kf = KineticsFitting(self.series_apo, bounds=(1e-2, 800), temperature=self.temperature, pH=self.pH)
        initial_rates = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_guess.txt'))

        t0 = time.time()  # Very crude benchmarks
        gibbs_guess = self.series_apo.guess_deltaG(
            initial_rates['rate']).to_numpy()
        fr_global = fit_gibbs_global(self.series_apo,
                                     gibbs_guess,
                                     epochs=1000,
                                     r1=2)
        t1 = time.time()

        assert t1 - t0 < 5
        out_deltaG = fr_global.output
        check_deltaG = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_torch_fit.txt'))

        assert np.allclose(check_deltaG['deltaG'],
                           out_deltaG['deltaG'],
                           equal_nan=True,
                           rtol=0.01)
        assert np.allclose(check_deltaG['covariance'],
                           out_deltaG['covariance'],
                           equal_nan=True,
                           rtol=0.01)
        assert np.allclose(check_deltaG['k_obs'],
                           out_deltaG['k_obs'],
                           equal_nan=True,
                           rtol=0.01)
Beispiel #2
0
    def test_dtype_cuda(self):
        check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit.csv')
        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')

        cfg.set('fitting', 'device', 'cuda')
        gibbs_guess = self.hdxm_apo.guess_deltaG(
            initial_rates['rate']).to_numpy()

        if torch.cuda.is_available():
            fr_global = fit_gibbs_global(self.hdxm_apo,
                                         gibbs_guess,
                                         epochs=1000,
                                         r1=2)
            out_deltaG = fr_global.output
            for field in ['dG', 'k_obs', 'covariance']:
                assert_series_equal(check_deltaG[field],
                                    out_deltaG[self.hdxm_apo.name, field],
                                    rtol=0.01,
                                    check_dtype=False,
                                    check_names=False)
        else:
            with pytest.raises(AssertionError, match=r".* CUDA .*"):
                fr_global = fit_gibbs_global(self.hdxm_apo,
                                             gibbs_guess,
                                             epochs=1000,
                                             r1=2)

        cfg.set('fitting', 'device', 'cpu')
        cfg.set('fitting', 'dtype', 'float32')

        fr_global = fit_gibbs_global(self.hdxm_apo,
                                     gibbs_guess,
                                     epochs=1000,
                                     r1=2)
        dg = fr_global.model.dG
        assert dg.dtype == torch.float32

        out_deltaG = fr_global.output
        for field in ['dG', 'k_obs']:
            assert_series_equal(check_deltaG[field],
                                out_deltaG[self.hdxm_apo.name, field],
                                rtol=0.01,
                                check_dtype=False,
                                check_names=False)

        cfg.set('fitting', 'dtype', 'float64')
Beispiel #3
0
    def setup_class(cls):
        cls.fpath = input_dir / 'ecSecB_apo.csv'
        data = read_dynamx(cls.fpath)
        control = ('Full deuteration control', 0.167*60)

        cls.temperature, cls.pH = 273.15 + 30, 8.

        pf = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False)
        pf.set_control(control)
        cls.hdxm = HDXMeasurement(pf.get_state('SecB WT apo'), temperature=cls.temperature, pH=cls.pH)

        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')

        gibbs_guess = cls.hdxm.guess_deltaG(initial_rates['rate'])
        cls.fit_result = fit_gibbs_global(cls.hdxm, gibbs_guess, epochs=100, r1=2)
Beispiel #4
0
    def test_global_fit_extended(self):
        check_deltaG = csv_to_protein(output_dir /
                                      'ecSecB_torch_fit_epochs_20000.csv')
        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')
        gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate'])

        t0 = time.time()  # Very crude benchmarks
        fr_global = fit_gibbs_global(self.hdxm_apo,
                                     gibbs_guess,
                                     epochs=20000,
                                     r1=2)
        t1 = time.time()

        assert t1 - t0 < 50
        out_deltaG = fr_global.output
        for field in ['dG', 'k_obs', 'covariance']:
            assert_series_equal(check_deltaG[self.hdxm_apo.name, field],
                                out_deltaG[self.hdxm_apo.name, field],
                                rtol=0.01,
                                check_dtype=False)

        errors = fr_global.get_squared_errors()
        assert errors.shape == (1, self.hdxm_apo.Np, self.hdxm_apo.Nt)
Beispiel #5
0
    def test_global_fit_extended_cuda(self):
        check_deltaG = csv_to_protein(output_dir /
                                      'ecSecB_torch_fit_epochs_20000.csv')
        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')
        gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate'])

        # todo allow contextmanger?
        cfg.set('fitting', 'device', 'cuda')
        cfg.set('fitting', 'dtype', 'float32')

        fr_global = fit_gibbs_global(self.hdxm_apo,
                                     gibbs_guess,
                                     epochs=20000,
                                     r1=2)
        out_deltaG = fr_global.output

        for field in ['dG', 'k_obs']:
            assert_series_equal(check_deltaG[self.hdxm_apo.name, field],
                                out_deltaG[self.hdxm_apo.name, field],
                                rtol=0.01,
                                check_dtype=False)

        cfg.set('fitting', 'device', 'cpu')
        cfg.set('fitting', 'dtype', 'float64')
pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False)
pmt.set_control(control)
temperature, pH = 273.15 + 30, 8.

hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), sequence=sequence, temperature=temperature, pH=pH)

if guess:
    client = default_client()
    wt_avg_result = fit_rates_weighted_average(hdxm, bounds=(1e-2, 800))
    output = wt_avg_result.output
    output.to_file(directory / 'test_data' / 'ecSecB_guess.txt')
else:
    output = csv_to_protein(directory / 'test_data' / 'ecSecB_guess.txt')

gibbs_guess = hdxm.guess_deltaG(output['rate'])
fr_torch = fit_gibbs_global(hdxm, gibbs_guess, epochs=epochs, r1=2)
fr_torch.output.to_file(directory / 'test_data' / 'ecSecB_torch_fit.txt')

hdxm_dimer = HDXMeasurement(pmt.get_state('SecB his dimer apo'), sequence=sequence_dimer,
                            temperature=temperature, pH=pH)

hdx_set = HDXMeasurementSet([hdxm_dimer, hdxm])

gibbs_guess = hdx_set.guess_deltaG([output['rate'], output['rate']])
batch_result = fit_gibbs_global_batch(hdx_set, gibbs_guess, epochs=epochs)

batch_result.output.to_file(directory / 'test_data' / 'ecSecB_batch.csv')
batch_result.output.to_file(directory / 'test_data' / 'ecSecB_batch.txt', fmt='pprint')

# Order is inverted compared to test!
mock_alignment = {
                      temperature=temperature,
                      pH=pH)

data = pmt.get_state('SecB WT apo')
reduced_data = data[data['end'] < 40]
hdxm_reduced = HDXMeasurement(reduced_data, temperature=temperature, pH=pH)

result = fit_rates_weighted_average(hdxm_reduced)
reduced_guess = result.output
dataframe_to_file(output_dir / 'ecSecB_reduced_guess.csv', reduced_guess)
dataframe_to_file(output_dir / 'ecSecB_reduced_guess.txt',
                  reduced_guess,
                  fmt='pprint')

gibbs_guess = hdxm_reduced.guess_deltaG(reduced_guess['rate'])
fr_torch = fit_gibbs_global(hdxm_reduced, gibbs_guess, epochs=epochs, r1=2)
save_fitresult(output_dir / 'ecsecb_reduced', fr_torch)

if guess:
    wt_avg_result = fit_rates_weighted_average(hdxm,
                                               bounds=(1e-2 / 60., 800 / 60.))
    guess_output = wt_avg_result.output
    dataframe_to_file(output_dir / 'ecSecB_guess.csv', guess_output)
    dataframe_to_file(output_dir / 'ecSecB_guess.txt',
                      guess_output,
                      fmt='pprint')
else:
    guess_output = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')

# Export protein sequence and intrinsic rate of exchange
hdxm.coverage.protein.to_file(output_dir / 'ecSecB_info.csv')
Beispiel #8
0
# Load the data of two Dynamx files, and combine the result to one table
data = read_dynamx(input_dir / 'ecSecB_apo.csv', input_dir / 'ecSecB_dimer.csv')

pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False)
pmt.set_control(('Full deuteration control', 0.167*60))
temperature, pH = 273.15 + 30, 8.
hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), temperature=temperature, pH=pH)

#%%

if guess:
    client = default_client()
    wt_avg_result = fit_rates_weighted_average(hdxm, client=client)
    init_guess = wt_avg_result.output
else:
    init_guess = csv_to_dataframe(test_data_dir / 'output' / 'ecSecB_guess.csv')

gibbs_guess = hdxm.guess_deltaG(init_guess['rate'])

#%%

fr_torch = fit_gibbs_global(hdxm, gibbs_guess, **fit_kwargs)

#Human readable output
fr_torch.to_file(output_dir / 'SecB_fit_result.txt', fmt='pprint')

#Machine readable output
fr_torch.to_file(output_dir / 'SecB_fit_result.csv', fmt='csv')

save_fitresult(output_dir / 'SecB_fit', fr_torch)
Beispiel #9
0
guess = False
epochs = 1000
root_dir = Path().resolve().parent
test_data_dir = root_dir / 'tests' / 'test_data'
input_file_path = test_data_dir / 'ecSecB_apo.csv'

# Load the data of two Dynamx files, and combine the result to one table
data = read_dynamx(test_data_dir / 'ecSecB_apo.csv',
                   test_data_dir / 'ecSecB_dimer.csv')

pmt = PeptideMasterTable(data,
                         drop_first=1,
                         ignore_prolines=True,
                         remove_nan=False)
pmt.set_control(('Full deuteration control', 0.167))
temperature, pH = 273.15 + 30, 8.
series = HDXMeasurement(pmt.get_state('SecB WT apo'),
                        temperature=temperature,
                        pH=pH)

if guess:
    client = default_client()
    wt_avg_result = fit_rates_weighted_average(series, client=client)
    init_guess = wt_avg_result.output
else:
    init_guess = csv_to_protein(test_data_dir / 'ecSecB_guess.txt')

gibbs_guess = series.guess_deltaG(init_guess['rate'])
fr_torch = fit_gibbs_global(series, gibbs_guess, epochs=epochs)
print(fr_torch.metadata['total_loss'])