Beispiel #1
0
    def test_global_fit(self):
        kf = KineticsFitting(self.series_apo,
                             bounds=(1e-2, 800),
                             temperature=self.temperature,
                             pH=self.pH)
        initial_rates = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_guess.txt'))

        t0 = time.time()  # Very crude benchmarks
        fr_global = kf.global_fit(initial_rates, epochs=1000)
        t1 = time.time()

        assert t1 - t0 < 5
        out_deltaG = fr_global.output
        check_deltaG = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_torch_fit.txt'))

        assert np.allclose(check_deltaG['deltaG'],
                           out_deltaG['deltaG'],
                           equal_nan=True,
                           rtol=0.01)
        assert np.allclose(check_deltaG['covariance'],
                           out_deltaG['covariance'],
                           equal_nan=True,
                           rtol=0.01)
Beispiel #2
0
def reload_dashboard():
    data_objs = {k: load_from_yaml(v, data_dir=data_dir) for k, v in yaml_dicts.items()}
    for k, v in data_objs.items():
        v.metadata['name'] = k
    ctrl.data_objects = data_objs

    rates = csv_to_protein(test_dir / 'rates.txt', column_depth=3).df

    fit = csv_to_protein(test_dir / 'global_fit.txt', column_depth=3).df
    colors = csv_to_protein(test_dir / 'colors.txt', column_depth=3).df

    peptides = csv_to_dataframe(test_dir / 'peptides.txt', column_depth=2, index_col=0)
    source = ctrl.sources['dataframe']
    source.add_df(rates, 'rates')
    source.add_df(peptides, 'peptides')
    #source.add_df(fit, 'global_fit')
    source.add_df(colors, 'colors')

    ctrl.sources['dataframe'].updated = True

    fit_control = ctrl.control_panels['FitControl']
    fit_control.epochs = 100
    fit_control.fit_mode = 'Single'
    fit_control.fit_name = 'new_global_fit_test_123'

    ngl = ctrl.views['protein']
    ngl.ngl_view.pdb_string = Path(test_dir / '1qyn.pdb').read_text()
Beispiel #3
0
    def test_batch_fit(self, tmp_path):
        hdx_set = HDXMeasurementSet([self.hdxm_apo, self.hdxm_dimer])
        guess = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')

        # Create rates dataframe
        rates_df = pd.DataFrame(
            {name: guess['rate']
             for name in hdx_set.names})

        gibbs_guess = hdx_set.guess_deltaG(rates_df)
        fr_global = fit_gibbs_global_batch(hdx_set, gibbs_guess, epochs=1000)

        fpath = Path(tmp_path) / 'fit_result_batch.csv'
        fr_global.to_file(fpath)
        df = csv_to_dataframe(fpath)
        assert df.attrs['metadata'] == fr_global.metadata

        output = fr_global.output

        check_protein = csv_to_protein(output_dir / 'ecSecB_batch.csv')
        states = ['SecB WT apo', 'SecB his dimer apo']

        for state in states:
            from pandas.testing import assert_series_equal

            result = output[state]['dG']
            test = check_protein[state]['dG']

            assert_series_equal(result, test, rtol=0.1)

        errors = fr_global.get_squared_errors()
        assert errors.shape == (hdx_set.Ns, hdx_set.Np, hdx_set.Nt)

        mock_alignment = {
            'apo':
            'MSEQNNTEMTFQIQRIYTKDI------------SFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLG-------------------EETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRG----TFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA',
            'dimer':
            'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVY--------------EVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGA----YCPNILFPAARECIASMVARGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA-----------------',
        }

        hdx_set.add_alignment(list(mock_alignment.values()))

        gibbs_guess = hdx_set[0].guess_deltaG(
            guess['rate'])  # Guesses from first measurement
        aligned_result = fit_gibbs_global_batch_aligned(hdx_set,
                                                        gibbs_guess,
                                                        r1=2,
                                                        r2=5,
                                                        epochs=1000)
        output = aligned_result.output
        check_protein = csv_to_protein(output_dir / 'ecSecB_batch_aligned.csv')
        states = ['SecB WT apo', 'SecB his dimer apo']

        for state in states:
            from pandas.testing import assert_series_equal
            result = output[state]['dG']
            test = check_protein[state]['dG']

            assert_series_equal(result, test, rtol=0.1)
Beispiel #4
0
    def test_batch_fit(self):
        hdx_set = HDXMeasurementSet([self.series_apo, self.series_dimer])
        guess = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_guess.txt'))

        gibbs_guess = hdx_set.guess_deltaG([guess['rate'], guess['rate']])
        result = fit_gibbs_global_batch(hdx_set, gibbs_guess, epochs=1000)

        output = result.output

        check_protein = csv_to_protein(os.path.join(directory, 'test_data',
                                                    'ecSecB_batch.csv'),
                                       column_depth=2)
        states = ['SecB WT apo', 'SecB his dimer apo']

        for state in states:
            from pandas.testing import assert_series_equal

            result = output[state]['deltaG']
            test = check_protein[state]['deltaG']

            assert_series_equal(result, test, rtol=0.1)

        mock_alignment = {
            'apo':
            'MSEQNNTEMTFQIQRIYTKDI------------SFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLG-------------------EETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRG----TFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA',
            'dimer':
            'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVY--------------EVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGA----YCPNILFPAARECIASMVARGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA-----------------',
        }

        hdx_set.add_alignment(list(mock_alignment.values()))

        gibbs_guess = hdx_set.guess_deltaG([guess['rate'], guess['rate']])
        aligned_result = fit_gibbs_global_batch_aligned(hdx_set,
                                                        gibbs_guess,
                                                        r1=2,
                                                        r2=5,
                                                        epochs=1000)
        output = aligned_result.output
        check_protein = csv_to_protein(os.path.join(
            directory, 'test_data', 'ecSecB_batch_aligned.csv'),
                                       column_depth=2)
        states = ['SecB WT apo', 'SecB his dimer apo']

        for state in states:
            from pandas.testing import assert_series_equal
            result = output[state]['deltaG']
            test = check_protein[state]['deltaG']

            assert_series_equal(result, test, rtol=0.1)
Beispiel #5
0
    def setup_class(cls):
        dtype = [('r_number', int), ('apple', float)]
        array1 = np.empty(15, dtype=dtype)
        array1['r_number'] = np.arange(15) + 3
        array1['apple'] = np.ones(15) * 12
        cls.array1 = array1

        dtype = [('r_number', int), ('apple', float), ('grapes', float)]
        array2 = np.empty(17, dtype=dtype)
        array2['r_number'] = np.arange(17) + 6
        array2['apple'] = np.ones(17) * 10
        array2['grapes'] = np.ones(17) * 15 + np.random.rand(17)
        cls.array2 = array2

        dtype = [('r_number', int), ('pear', float), ('banana', float)]
        array3 = np.empty(10, dtype=dtype)
        array3['r_number'] = np.arange(10) + 1
        array3['pear'] = np.random.rand(10) + 20
        array3['banana'] = -(np.random.rand(10) + 20)
        cls.array3 = array3
        metadata = {
            'temperature': 273.15,
            'pH': 7.5,
            'mutations': ['V123S', 'P234S']
        }
        cls.protein = csv_to_protein(output_dir / 'ecSecB_info.csv')
        cls.protein.metadata = metadata

        fpath = input_dir / 'ecSecB_apo.csv'
        pf1 = PeptideMasterTable(read_dynamx(fpath))
        cls.series = HDXMeasurement(pf1.get_state('SecB WT apo'), c_term=200)
Beispiel #6
0
    def setup_class(cls):
        dtype = [('r_number', int), ('apple', float)]
        array1 = np.empty(15, dtype=dtype)
        array1['r_number'] = np.arange(15) + 3
        array1['apple'] = np.ones(15) * 12
        cls.array1 = array1

        dtype = [('r_number', int), ('apple', float), ('grapes', float)]
        array2 = np.empty(17, dtype=dtype)
        array2['r_number'] = np.arange(17) + 6
        array2['apple'] = np.ones(17) * 10
        array2['grapes'] = np.ones(17) * 15 + np.random.rand(17)
        cls.array2 = array2

        dtype = [('r_number', int), ('pear', float), ('banana', float)]
        array3 = np.empty(10, dtype=dtype)
        array3['r_number'] = np.arange(10) + 1
        array3['pear'] = np.random.rand(10) + 20
        array3['banana'] = -(np.random.rand(10) + 20)
        cls.array3 = array3

        cls.protein = csv_to_protein(directory / 'test_data' / 'ecSecB_info.csv', column_depth=1)

        fpath = directory / 'test_data' / 'ecSecB_apo.csv'
        pf1 = PeptideMasterTable(read_dynamx(fpath))
        #states = pf1.groupby_state(c_term=200)
        cls.series = HDXMeasurement(pf1.get_state('SecB WT apo'), c_term=200)
Beispiel #7
0
    def test_initial_guess_wt_average(self):
        result = fit_rates_weighted_average(self.reduced_hdxm)
        output = result.output

        assert output.size == 100
        check_rates = csv_to_protein(output_dir / 'ecSecB_reduced_guess.csv')
        pd.testing.assert_series_equal(check_rates['rate'], output['rate'])
Beispiel #8
0
 def test_to_file(self):
     with tempfile.TemporaryDirectory() as tempdir:
         fpath = Path(tempdir) / 'protein.csv'
         self.protein.to_file(fpath)
         protein_read = csv_to_protein(fpath)
         pd.testing.assert_frame_equal(self.protein.df, protein_read.df)
         assert self.protein.metadata == protein_read.metadata
Beispiel #9
0
    def test_read_write_tables(self, tmp_path):
        # Single-index columns
        df = pd.DataFrame(np.random.randn(25, 4), columns=list('ABCD'))
        df.index.name = 'singlecolumnindex'

        sio = StringIO()
        dataframe_to_stringio(df, sio)
        sio.seek(0)
        df_read = csv_to_dataframe(sio)
        pd.testing.assert_frame_equal(df, df_read)

        fpath = Path(tmp_path) / 'single_index.csv'
        dataframe_to_file(fpath, df)
        csv_to_dataframe(fpath)
        pd.testing.assert_frame_equal(df, df_read)

        # multi-index column
        cols = pd.MultiIndex.from_product([('a', 'b'), ('x', 'y')])
        df = pd.DataFrame(np.random.randn(25, 4), columns=cols)
        df.index.name = 'multicolumnindex'

        sio = StringIO()
        dataframe_to_stringio(df, sio)
        sio.seek(0)
        df_read = csv_to_dataframe(sio)
        pd.testing.assert_frame_equal(df, df_read)

        fpath = Path(tmp_path) / 'multi_index.csv'
        dataframe_to_file(fpath, df)
        df_read = csv_to_dataframe(fpath)
        pd.testing.assert_frame_equal(df, df_read)

        protein = csv_to_protein(fpath)
        assert protein.index.name == 'r_number'
        assert isinstance(protein, Protein)

        metadata = {
            'instrumuent': 'LCMS',
            'settings': {'pressure': '5 kPa', 'temperature': '400K'}
        }

        df.attrs['metadata'] = metadata

        fpath = Path(tmp_path) / 'multi_index_with_metadata.csv'
        dataframe_to_file(fpath, df)
        df_read = csv_to_dataframe(fpath)
        pd.testing.assert_frame_equal(df, df_read)

        assert df_read.attrs['metadata'] == metadata

        fpath = Path(tmp_path) / 'multi_index_with_metadata.txt'
        dataframe_to_file(fpath, df, fmt='pprint', include_version=True)
        lines = Path(fpath).read_text().split('\n')
        assert len(lines) == 38
        assert lines[0].strip() == pyhdx.VERSION_STRING
Beispiel #10
0
    def test_methods(self):
        path = directory / 'test_data' / 'ecSecB_guess.txt'

        # testing csv_to_protein
        ret = csv_to_protein(path)
        assert type(ret) == Protein
        assert ret.index.name == 'r_number'

        # testing txt_to_np
        with open(path, mode='r') as f:
            ret = csv_to_np(StringIO(f.read()))
            assert 'r_number' in ret.dtype.names
Beispiel #11
0
    def test_batch_fit(self):
        kfs = [
            KineticsFitting(series, temperature=self.temperature, pH=self.pH)
            for series in [self.series_apo, self.series_dimer]
        ]
        guess = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_guess.txt'))

        bf = BatchFitting(kfs, [guess, guess])
        result = bf.global_fit(epochs=1000)
        output = result.output

        check_protein = csv_to_protein(
            os.path.join(directory, 'test_data', 'ecSecB_batch.csv'))

        states = ['SecB WT apo', 'SecB his dimer apo']

        for state in states:
            assert np.allclose(output[state]['deltaG'],
                               check_protein[state]['deltaG'],
                               equal_nan=True,
                               rtol=0.01)
Beispiel #12
0
def reload_dashboard():
    data_objs = {k: yaml_to_hdxm(v, data_dir=data_dir) for k, v in yaml_dict.items()}
    for k, v in data_objs.items():
        v.metadata['name'] = k

    source = ctrl.sources['dataframe']
    for ds in ['peptides', 'peptides_mse', 'd_calc', 'rfu', 'rates', 'global_fit', 'losses']:
        df = csv_to_protein(test_dir / f'{ds}.csv')
        source.add_df(df, ds)

    #Temporary workaround for comment characters in csv files
    ds = 'colors'
    df = pd.read_csv(test_dir / f'{ds}.csv', header=[0, 1, 2], index_col=0,
                     skiprows=3)
    source.add_df(df, ds)
Beispiel #13
0
    def test_dtype_cuda(self):
        check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit.csv')
        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')

        cfg.set('fitting', 'device', 'cuda')
        gibbs_guess = self.hdxm_apo.guess_deltaG(
            initial_rates['rate']).to_numpy()

        if torch.cuda.is_available():
            fr_global = fit_gibbs_global(self.hdxm_apo,
                                         gibbs_guess,
                                         epochs=1000,
                                         r1=2)
            out_deltaG = fr_global.output
            for field in ['dG', 'k_obs', 'covariance']:
                assert_series_equal(check_deltaG[field],
                                    out_deltaG[self.hdxm_apo.name, field],
                                    rtol=0.01,
                                    check_dtype=False,
                                    check_names=False)
        else:
            with pytest.raises(AssertionError, match=r".* CUDA .*"):
                fr_global = fit_gibbs_global(self.hdxm_apo,
                                             gibbs_guess,
                                             epochs=1000,
                                             r1=2)

        cfg.set('fitting', 'device', 'cpu')
        cfg.set('fitting', 'dtype', 'float32')

        fr_global = fit_gibbs_global(self.hdxm_apo,
                                     gibbs_guess,
                                     epochs=1000,
                                     r1=2)
        dg = fr_global.model.dG
        assert dg.dtype == torch.float32

        out_deltaG = fr_global.output
        for field in ['dG', 'k_obs']:
            assert_series_equal(check_deltaG[field],
                                out_deltaG[self.hdxm_apo.name, field],
                                rtol=0.01,
                                check_dtype=False,
                                check_names=False)

        cfg.set('fitting', 'dtype', 'float64')
Beispiel #14
0
    def setup_class(cls):
        cls.fpath = directory / 'test_data' / 'ecSecB_apo.csv'
        cls.pmt = PeptideMasterTable(read_dynamx(cls.fpath))

        cls.state = 'SecB WT apo'
        cls.control = ('Full deuteration control', 0.167)
        cls.pmt.set_control(cls.control)

        state_data = cls.pmt.get_state(cls.state)
        cls.temperature, cls.pH = 273.15 + 30, 8.
        cls.series = HDXMeasurement(state_data,
                                    temperature=cls.temperature,
                                    pH=cls.pH)
        cls.prot_fit_result = csv_to_protein(directory / 'test_data' /
                                             'ecSecB_torch_fit.txt')

        cfg = ConfigurationSettings()
        cfg.set('cluster', 'port', str(test_port))
Beispiel #15
0
def do_fitting_from_yaml(yaml_dict, kf_obj):
    raise NotImplementedError('Fitting from yaml not implemented')
    yaml_dict = copy.deepcopy(
        yaml_dict)  # Make local copy to not affect the supplied dict by pop
    guess = yaml_dict['initial_guess']
    if 'file_path' in guess.keys():
        try:  # todo update when txt files are no longer in existence
            initial_guess = txt_to_protein(guess['file_path'])
        except KeyError:
            initial_guess = csv_to_protein(guess['file_path'])
    else:
        raise NotImplementedError('only guesses by file currently')

    global_fit = yaml_dict['global_fit']
    optimizer_kwargs = global_fit.pop('optimizer_kwargs')
    fit_result = kf_obj.global_fit(initial_guess, **global_fit,
                                   **optimizer_kwargs)

    return fit_result
Beispiel #16
0
    def setup_class(cls):
        cls.fpath = directory / 'test_data' / 'ecSecB_apo.csv'
        cls.pmt = PeptideMasterTable(read_dynamx(cls.fpath))

        cls.state = 'SecB WT apo'
        cls.control = ('Full deuteration control', 0.167)
        cls.pmt.set_control(cls.control)

        states = cls.pmt.groupby_state()
        cls.series = states[cls.state]

        cls.prot_fit_result = csv_to_protein(directory / 'test_data' /
                                             'ecSecB_torch_fit.txt')

        cls.ds_fit = DataSource(cls.prot_fit_result,
                                name='global_fit',
                                x='r_number',
                                tags=['mapping', 'pfact', 'deltaG'],
                                renderer='circle',
                                size=10)
Beispiel #17
0
    def test_global_fit_extended(self):
        check_deltaG = csv_to_protein(output_dir /
                                      'ecSecB_torch_fit_epochs_20000.csv')
        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')
        gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate'])

        t0 = time.time()  # Very crude benchmarks
        fr_global = fit_gibbs_global(self.hdxm_apo,
                                     gibbs_guess,
                                     epochs=20000,
                                     r1=2)
        t1 = time.time()

        assert t1 - t0 < 50
        out_deltaG = fr_global.output
        for field in ['dG', 'k_obs', 'covariance']:
            assert_series_equal(check_deltaG[self.hdxm_apo.name, field],
                                out_deltaG[self.hdxm_apo.name, field],
                                rtol=0.01,
                                check_dtype=False)

        errors = fr_global.get_squared_errors()
        assert errors.shape == (1, self.hdxm_apo.Np, self.hdxm_apo.Nt)
Beispiel #18
0
    def test_global_fit_extended_cuda(self):
        check_deltaG = csv_to_protein(output_dir /
                                      'ecSecB_torch_fit_epochs_20000.csv')
        initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')
        gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate'])

        # todo allow contextmanger?
        cfg.set('fitting', 'device', 'cuda')
        cfg.set('fitting', 'dtype', 'float32')

        fr_global = fit_gibbs_global(self.hdxm_apo,
                                     gibbs_guess,
                                     epochs=20000,
                                     r1=2)
        out_deltaG = fr_global.output

        for field in ['dG', 'k_obs']:
            assert_series_equal(check_deltaG[self.hdxm_apo.name, field],
                                out_deltaG[self.hdxm_apo.name, field],
                                rtol=0.01,
                                check_dtype=False)

        cfg.set('fitting', 'device', 'cpu')
        cfg.set('fitting', 'dtype', 'float64')
current_dir = Path(__file__).parent
#current_dir = Path().cwd() / 'templates'  # pycharm scientific compat

output_dir = current_dir / 'output'
output_dir.mkdir(exist_ok=True)
data_dir = current_dir.parent / 'tests' / 'test_data'
data = read_dynamx(data_dir / 'input' / 'ecSecB_apo.csv', data_dir / 'input' / 'ecSecB_dimer.csv')

pmt = PeptideMasterTable(data)
pmt.set_control(('Full deuteration control', 0.167*60))

st1 = HDXMeasurement(pmt.get_state('SecB his dimer apo'), pH=8, temperature=273.15 + 30)
st2 = HDXMeasurement(pmt.get_state('SecB WT apo'), pH=8, temperature=273.15 + 30)

hdx_set = HDXMeasurementSet([st1, st2])
guess = csv_to_protein(data_dir / 'output' / 'ecSecB_guess.csv')
gibbs_guess = hdx_set[0].guess_deltaG(guess['rate'])


# Example fit with only 5000 epochs and high learning rate
# Checkpoint stores model history every `epoch_step` epochs
checkpoint = CheckPoint(epoch_step=250)
result = fit_gibbs_global_batch(hdx_set, gibbs_guess, r1=0.5, r2=0.1, epochs=5000, lr=1e5, callbacks=[checkpoint])
print(f"MSE loss: {result.mse_loss:.2f}, "
      f"Reg loss: {result.reg_loss:.2f}, "
      f"Reg percent: {result.regularization_percentage:.0f}%")


df = checkpoint.to_dataframe(hdx_set.names)
dataframe_to_file(output_dir / 'model_history.csv', df)
dataframe_to_file(output_dir / 'model_history.txt', df, fmt='pprint')
Beispiel #20
0
from pyhdx import VERSION_STRING

current_dir = Path(__file__).parent
data_dir = current_dir.parent / 'tests' / 'test_data'
yaml_stream = Path(current_dir / 'yaml_files' / 'SecB.yaml').read_text()
data_dict = yaml.safe_load(yaml_stream)

output_dir = current_dir / 'fit'
output_dir.mkdir(exist_ok=True)

hdxm_list = [
    load_from_yaml(dic, data_dir=data_dir, name=name)
    for name, dic in data_dict.items()
]
rates_list = [
    csv_to_protein(current_dir / 'guesses' / f'{name}_rates_guess.txt')['rate']
    for name in data_dict.keys()
]
hdx_set = HDXMeasurementSet(hdxm_list)

gibbs_guess = hdx_set.guess_deltaG(rates_list)

log_file = output_dir / f"fitting_log.txt"
now = datetime.now()
date = f'# {now.strftime("%Y/%m/%d %H:%M:%S")} ({int(now.timestamp())})'

lines = [VERSION_STRING, date]

r2 = 0.5
for r1 in [0, 0.01, 0.25, 0.5, 1]:
    t0 = time.time()
Beispiel #21
0
    'MSEQNNTEMTFQIQRIYTKDI------------SFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLG-------------------EETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRG----TFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA',
}

current_dir = Path(__file__).parent

data_dir = current_dir.parent / 'tests' / 'test_data'
data = read_dynamx(data_dir / 'ecSecB_apo.csv', data_dir / 'ecSecB_dimer.csv')

pmt = PeptideMasterTable(data)
pmt.set_control(('Full deuteration control', 0.167))

st1 = HDXMeasurement(pmt.get_state('SecB his dimer apo'),
                     pH=8,
                     temperature=273.15 + 30)
st2 = HDXMeasurement(pmt.get_state('SecB WT apo'),
                     pH=8,
                     temperature=273.15 + 30)

guess = csv_to_protein(data_dir / 'ecSecB_guess.txt')

hdx_set = HDXMeasurementSet([st1, st2])
gibbs_guess = hdx_set.guess_deltaG([guess['rate'], guess['rate']])
hdx_set.add_alignment(list(mock_alignment.values()))
result = fit_gibbs_global_batch_aligned(hdx_set,
                                        gibbs_guess,
                                        r1=2,
                                        r2=5,
                                        epochs=1000)

print(result.output)
Beispiel #22
0
guess = False
epochs = 1000
root_dir = Path().resolve().parent
test_data_dir = root_dir / 'tests' / 'test_data'
input_file_path = test_data_dir / 'ecSecB_apo.csv'

# Load the data of two Dynamx files, and combine the result to one table
data = read_dynamx(test_data_dir / 'ecSecB_apo.csv',
                   test_data_dir / 'ecSecB_dimer.csv')

pmt = PeptideMasterTable(data,
                         drop_first=1,
                         ignore_prolines=True,
                         remove_nan=False)
pmt.set_control(('Full deuteration control', 0.167))
temperature, pH = 273.15 + 30, 8.
series = HDXMeasurement(pmt.get_state('SecB WT apo'),
                        temperature=temperature,
                        pH=pH)

if guess:
    client = default_client()
    wt_avg_result = fit_rates_weighted_average(series, client=client)
    init_guess = wt_avg_result.output
else:
    init_guess = csv_to_protein(test_data_dir / 'ecSecB_guess.txt')

gibbs_guess = series.guess_deltaG(init_guess['rate'])
fr_torch = fit_gibbs_global(series, gibbs_guess, epochs=epochs)
print(fr_torch.metadata['total_loss'])
data = read_dynamx(test_data_dir / 'ecSecB_apo.csv', test_data_dir / 'ecSecB_dimer.csv')

pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False)
pmt.set_control(control)
temperature, pH = 273.15 + 30, 8.

hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), sequence=sequence, temperature=temperature, pH=pH)

if guess:
    client = default_client()
    wt_avg_result = fit_rates_weighted_average(hdxm, bounds=(1e-2, 800))
    output = wt_avg_result.output
    output.to_file(directory / 'test_data' / 'ecSecB_guess.txt')
else:
    output = csv_to_protein(directory / 'test_data' / 'ecSecB_guess.txt')

gibbs_guess = hdxm.guess_deltaG(output['rate'])
fr_torch = fit_gibbs_global(hdxm, gibbs_guess, epochs=epochs, r1=2)
fr_torch.output.to_file(directory / 'test_data' / 'ecSecB_torch_fit.txt')

hdxm_dimer = HDXMeasurement(pmt.get_state('SecB his dimer apo'), sequence=sequence_dimer,
                            temperature=temperature, pH=pH)

hdx_set = HDXMeasurementSet([hdxm_dimer, hdxm])

gibbs_guess = hdx_set.guess_deltaG([output['rate'], output['rate']])
batch_result = fit_gibbs_global_batch(hdx_set, gibbs_guess, epochs=epochs)

batch_result.output.to_file(directory / 'test_data' / 'ecSecB_batch.csv')
batch_result.output.to_file(directory / 'test_data' / 'ecSecB_batch.txt', fmt='pprint')