def setup_class(cls): dtype = [('r_number', int), ('apple', float)] array1 = np.empty(15, dtype=dtype) array1['r_number'] = np.arange(15) + 3 array1['apple'] = np.ones(15) * 12 cls.array1 = array1 dtype = [('r_number', int), ('apple', float), ('grapes', float)] array2 = np.empty(17, dtype=dtype) array2['r_number'] = np.arange(17) + 6 array2['apple'] = np.ones(17) * 10 array2['grapes'] = np.ones(17) * 15 + np.random.rand(17) cls.array2 = array2 dtype = [('r_number', int), ('pear', float), ('banana', float)] array3 = np.empty(10, dtype=dtype) array3['r_number'] = np.arange(10) + 1 array3['pear'] = np.random.rand(10) + 20 array3['banana'] = -(np.random.rand(10) + 20) cls.array3 = array3 metadata = { 'temperature': 273.15, 'pH': 7.5, 'mutations': ['V123S', 'P234S'] } cls.protein = csv_to_protein(output_dir / 'ecSecB_info.csv') cls.protein.metadata = metadata fpath = input_dir / 'ecSecB_apo.csv' pf1 = PeptideMasterTable(read_dynamx(fpath)) cls.series = HDXMeasurement(pf1.get_state('SecB WT apo'), c_term=200)
def setup_class(cls): dtype = [('r_number', int), ('apple', float)] array1 = np.empty(15, dtype=dtype) array1['r_number'] = np.arange(15) + 3 array1['apple'] = np.ones(15) * 12 cls.array1 = array1 dtype = [('r_number', int), ('apple', float), ('grapes', float)] array2 = np.empty(17, dtype=dtype) array2['r_number'] = np.arange(17) + 6 array2['apple'] = np.ones(17) * 10 array2['grapes'] = np.ones(17) * 15 + np.random.rand(17) cls.array2 = array2 dtype = [('r_number', int), ('pear', float), ('banana', float)] array3 = np.empty(10, dtype=dtype) array3['r_number'] = np.arange(10) + 1 array3['pear'] = np.random.rand(10) + 20 array3['banana'] = -(np.random.rand(10) + 20) cls.array3 = array3 cls.protein = txt_to_protein(directory / 'test_data' / 'simulated_data_info.txt') fpath = directory / 'test_data' / 'ecSecB_apo.csv' pf1 = PeptideMasterTable(read_dynamx(fpath)) states = pf1.groupby_state(c_term=200) cls.series = states['SecB WT apo']
def test_drop_first_prolines(self): for i, df in enumerate([1, 2, 3]): pcf = PeptideMasterTable(self.data, drop_first=df, ignore_prolines=True, remove_nan=False) states = pcf.groupby_state() assert len(states) == 1 series = states['state1'] assert len(series) == len(self.timepoints) peptides = series[3] #assert peptides.start == self.start + df + 2 # 2 prolines #assert peptides.end == self.end #take only the exchanging residues # this only holds up to the first coverage break assert np.sum(peptides.block_length) == len(peptides.r_number) reductions = [[4, 3, 2, 3, 2, 2, 1], [4, 3, 3, 4, 3, 2, 2], [4, 4, 4, 5, 4, 3, 3]][i] #unmodified: [11 15 9 19 8 9 5] lengths = np.array([len(seq) for seq in peptides.data['sequence'] ]) - np.array(reductions) assert np.all(lengths == peptides.data['ex_residues'])
def test_keep_prolines(self): pcf = PeptideMasterTable(self.data, drop_first=0, ignore_prolines=False, remove_nan=False) states = pcf.groupby_state() assert len(states) == 1 series = states['state1'] assert len(series) == len(self.timepoints) peptides = series[3] assert len(peptides.r_number) == self.end - self.start assert np.all(np.diff(peptides.r_number) == 1) blocks = [1, 4, 2, 4, 3, 2, 10, 4, 2, 3, 3, 2, 1] assert np.all(blocks == peptides.block_length) lengths = peptides.data['end'] - peptides.data['start'] assert np.all(lengths == peptides.data['ex_residues']) # for row in peptides.X: # assert np.sum(row) == 1 assert peptides.X.shape == (len(self.data) / len(self.timepoints), self.end - self.start) #assert np.all(np.sum(peptides.X, axis=1) == 1) for row, elem in zip(peptides.X, peptides.data): assert np.nansum(row) == len(elem['sequence']) assert np.sum(peptides.protein['coverage']) == self.nc_start - self.start + self.end - self.nc_end assert peptides.exposure == self.timepoints[3] assert peptides.state == 'state1' assert ''.join(peptides.protein['sequence']) == self.sequence
def setup_class(cls): fpath = directory / 'test_data' / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(fpath)) d = cls.pmt.groupby_state() cls.series = d['SecB WT apo'] cls.sequence = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQM' \ 'AHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA'
def setup_class(cls): fpath = directory / 'test_data' / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(fpath)) cls.pmt.set_control(('Full deuteration control', 0.167)) d = cls.pmt.get_state('SecB WT apo') cls.temperature, cls.pH = 273.15 + 30, 8. cls.series = HDXMeasurement(d, temperature=cls.temperature, pH=cls.pH)
def setup_class(cls): fpath = input_dir / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(fpath)) data = cls.pmt.get_state('SecB WT apo') cls.hdxm = HDXMeasurement(data) cls.sequence = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQM' \ 'AHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA'
def setup_class(cls): fpath = input_dir / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(fpath)) cls.pmt.set_control(('Full deuteration control', 0.167 * 60)) d = cls.pmt.get_state('SecB WT apo') cls.temperature, cls.pH = 273.15 + 30, 8. cls.hdxm = HDXMeasurement(d, temperature=cls.temperature, pH=cls.pH)
def setup_class(cls): fpath_apo = os.path.join(directory, 'test_data', 'ecSecB_apo.csv') fpath_dimer = os.path.join(directory, 'test_data', 'ecSecB_dimer.csv') data = read_dynamx(fpath_apo, fpath_dimer) control = ('Full deuteration control', 0.167) cls.temperature, cls.pH = 273.15 + 30, 8. pf = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pf.set_control(control) states = pf.groupby_state() cls.series_apo = states['SecB WT apo'] cls.series_dimer = states['SecB his dimer apo']
def setup_class(cls): fpath_apo = input_dir / 'ecSecB_apo.csv' fpath_dimer = input_dir / 'ecSecB_dimer.csv' data = read_dynamx(fpath_apo, fpath_dimer) control = ('Full deuteration control', 0.167 * 60) cls.temperature, cls.pH = 273.15 + 30, 8. pf = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pf.set_control(control) cls.hdxm_apo = HDXMeasurement(pf.get_state('SecB WT apo'), temperature=cls.temperature, pH=cls.pH) cls.hdxm_dimer = HDXMeasurement(pf.get_state('SecB his dimer apo'), temperature=cls.temperature, pH=cls.pH) data = pf.get_state('SecB WT apo') reduced_data = data[data['end'] < 40] cls.reduced_hdxm = HDXMeasurement(reduced_data) cluster = LocalCluster() cls.address = cluster.scheduler_address
def setup_class(cls): cls.fpath = directory / 'test_data' / 'simulated_data_uptake.csv' cls.pmt = PeptideMasterTable(read_dynamx(cls.fpath)) cls.state = 'state1' cls.pmt.set_backexchange(0.) states = cls.pmt.groupby_state() cls.series = states[cls.state]
def test_fitting(self): pmt = PeptideMasterTable(self.data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_backexchange(0.) states = pmt.groupby_state() series = states['state1'] kf = KineticsFitting(series, bounds=(1e-2, 800)) fr1 = kf.weighted_avg_fit() out1 = fr1.output check1 = txt_to_protein( os.path.join(directory, 'test_data', 'fit_simulated_wt_avg.txt')) for name in ['rate', 'k1', 'k2', 'r']: assert np.allclose(out1[name], check1[name], rtol=0.01, equal_nan=True)
def test_torch_fitting(self): pmt = PeptideMasterTable(self.data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_backexchange(0.) states = pmt.groupby_state() series = states['state1'] kf = KineticsFitting(series, bounds=(1e-2, 800), temperature=300, pH=8) initial_rates = txt_to_protein( os.path.join(directory, 'test_data', 'fit_simulated_wt_avg.txt')) fr_pfact = kf.global_fit(initial_rates, epochs=1000) out_deltaG = fr_pfact.output check_deltaG = txt_to_protein( os.path.join(directory, 'test_data', 'fit_simulated_torch.txt')) assert np.allclose(check_deltaG['deltaG'], out_deltaG['deltaG'], equal_nan=True, rtol=0.01)
def generate_data(peptides, sequence, timepoints, rates, state='state1'): """ Generate HDX data array peptides: list of (start, stop) sequence: string of characters timepoints: list of timepoints, time units the same as rates rates: GT rates of exchange for peptides """ start, end = np.array(peptides).T size = np.max(end - start) + 1 # Generate a data array to create a coverage object dtype = [('start', int), ('end', int), ('exposure', float), ('state', f'U{len(state)}'), ('sequence', f'U{size}')] data = np.empty(len(start), dtype=dtype) data['start'] = start data['end'] = end data['exposure'][:] = 0.1 data['state'][:] = 'state1' data['sequence'] = list([sequence[s - 1:e] for s, e in zip(start, end)]) pmt = PeptideMasterTable(data, drop_first=0, ignore_prolines=False, remove_nan=False) cov = Coverage(pmt.data) #crop rates to the right size prot_rates = rates[:cov.prot_len] arrays = [] for t in timepoints: uptake = 1 - np.exp(-prot_rates * t) uptake *= 100 # pertange uptake per residue for time t scores = cov.X.dot(uptake) # scores (%) full_dtype = dtype + [('scores', float)] full_data = np.empty(len(start), dtype=full_dtype) full_data['start'] = start full_data['end'] = end full_data['exposure'][:] = t full_data['state'][:] = 'state1' full_data['sequence'] = data['sequence'] full_data['scores'] = scores arrays.append(full_data) stacked = stack_arrays(arrays, usemask=False, autoconvert=True) return cov, stacked, prot_rates
def setup_class(cls): cls.fpath = input_dir / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(cls.fpath)) cls.state = 'SecB WT apo' cls.control = ('Full deuteration control', 0.167*60) cls.pmt.set_control(cls.control) state_data = cls.pmt.get_state(cls.state) cls.temperature, cls.pH = 273.15 + 30, 8. cls.hdxm = HDXMeasurement(state_data, temperature=cls.temperature, pH=cls.pH) cfg = ConfigurationSettings() cfg.set('cluster', 'scheduler_address', f'127.0.0.1:{test_port}')
def setup_class(cls): cls.fpath = directory / 'test_data' / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(cls.fpath)) cls.state = 'SecB WT apo' cls.control = ('Full deuteration control', 0.167) cls.pmt.set_control(cls.control) state_data = cls.pmt.get_state(cls.state) cls.temperature, cls.pH = 273.15 + 30, 8. cls.series = HDXMeasurement(state_data, temperature=cls.temperature, pH=cls.pH) cls.prot_fit_result = csv_to_protein(directory / 'test_data' / 'ecSecB_torch_fit.txt') cfg = ConfigurationSettings() cfg.set('cluster', 'port', str(test_port))
def setup_class(cls): cls.fpath = directory / 'test_data' / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(cls.fpath)) cls.state = 'SecB WT apo' cls.control = ('Full deuteration control', 0.167) cls.pmt.set_control(cls.control) states = cls.pmt.groupby_state() cls.series = states[cls.state] cls.prot_fit_result = csv_to_protein(directory / 'test_data' / 'ecSecB_torch_fit.txt') cls.ds_fit = DataSource(cls.prot_fit_result, name='global_fit', x='r_number', tags=['mapping', 'pfact', 'deltaG'], renderer='circle', size=10)
directory = Path(__file__).parent torch.manual_seed(43) np.random.seed(43) epochs = 1000 fpath = directory / 'test_data' / 'simulated_data_uptake.csv' data = read_dynamx(fpath) sequence = 'XXXXTPPRILALSAPLTTMMFSASALAPKIXXXXLVIPWINGDKG' timepoints = [0.167, 0.5, 1, 5, 10, 30, 100] start, end = 5, 45 # total span of protein (inc, inc) nc_start, nc_end = 31, 34 # span of no coverage area (inc, inc) pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_backexchange(0.) states = pmt.groupby_state() series = states['state1'] temperature, pH = 300, 8 series.cov.protein.set_k_int(temperature=temperature, pH=pH) series.cov.protein.to_file(directory / 'test_data' / 'simulated_data_info.txt') kf = KineticsFitting(series, bounds=(1e-2, 800), temperature=temperature, pH=pH) fr1 = kf.weighted_avg_fit()
from pyhdx import read_dynamx, PeptideMasterTable from pyhdx.support import get_reduced_blocks from pyhdx.plot import plot_peptides import matplotlib.pyplot as plt import os import numpy as np data_dir = '../../tests/test_data' filename = 'ecSecB_apo.csv' fpath = os.path.join(data_dir, filename) data = read_dynamx(fpath) master_table = PeptideMasterTable(data, drop_first=0, ignore_prolines=False) states = master_table.groupby_state() print(states.keys()) series = states['SecB WT apo'] split = series.split() key = list(split)[1] cov = split[key].cov def add_blocks(ax, positions, color): for pos in positions: ax.plot([pos, pos], [-40, 2], color=color, linewidth=2) # linestyle=(0, (1, 1)) text_x = positions[:-1] + np.diff(positions) / 2 for i, x in enumerate(text_x[text_x < 58]): ax.text(x,
torch.manual_seed(43) np.random.seed(43) epochs = 1000 directory = Path(__file__).parent test_data_dir = directory / 'test_data' guess = False control = ('Full deuteration control', 0.167) data = read_dynamx(test_data_dir / 'ecSecB_apo.csv', test_data_dir / 'ecSecB_dimer.csv') pf = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pf.set_control(control) states = pf.groupby_state() series = states['SecB WT apo'] temperature, pH = 273.15 + 30, 8. kf = KineticsFitting(series, bounds=(1e-2, 800), temperature=temperature, pH=pH) if guess: wt_avg_result = kf.weighted_avg_fit() output = wt_avg_result.output output.to_file(directory / 'test_data' / 'ecSecB_guess.txt')
epochs = 1000 sequence = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA' sequence_dimer = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPAARECIASMVARGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA' cwd = Path(__file__).parent input_dir = cwd / 'test_data' / 'input' output_dir = cwd / 'test_data' / 'output' guess = True control = ('Full deuteration control', 0.167 * 60) data = read_dynamx(input_dir / 'ecSecB_apo.csv', input_dir / 'ecSecB_dimer.csv') pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_control(control) temperature, pH = 273.15 + 30, 8. hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), sequence=sequence, temperature=temperature, pH=pH) data = pmt.get_state('SecB WT apo') reduced_data = data[data['end'] < 40] hdxm_reduced = HDXMeasurement(reduced_data, temperature=temperature, pH=pH) result = fit_rates_weighted_average(hdxm_reduced) reduced_guess = result.output
torch.manual_seed(43) np.random.seed(43) epochs = 1000 sequence = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA' sequence_dimer = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPAARECIASMVARGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA' directory = Path(__file__).parent test_data_dir = directory / 'test_data' guess = False # guess true requires dask cluster at config defined ip/port control = ('Full deuteration control', 0.167) data = read_dynamx(test_data_dir / 'ecSecB_apo.csv', test_data_dir / 'ecSecB_dimer.csv') pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_control(control) temperature, pH = 273.15 + 30, 8. hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), sequence=sequence, temperature=temperature, pH=pH) if guess: client = default_client() wt_avg_result = fit_rates_weighted_average(hdxm, bounds=(1e-2, 800)) output = wt_avg_result.output output.to_file(directory / 'test_data' / 'ecSecB_guess.txt') else: output = csv_to_protein(directory / 'test_data' / 'ecSecB_guess.txt') gibbs_guess = hdxm.guess_deltaG(output['rate']) fr_torch = fit_gibbs_global(hdxm, gibbs_guess, epochs=epochs, r1=2)
from pyhdx import read_dynamx, PeptideMasterTable, KineticsFitting from pyhdx.support import get_reduced_blocks, get_original_blocks from pyhdx.plot import plot_peptides import matplotlib.pyplot as plt import os import numpy as np data_dir = '../../tests/test_data' filename = 'ecSecB_apo.csv' refit = False fpath = os.path.join(data_dir, filename) data = read_dynamx(fpath) master_table = PeptideMasterTable(data, drop_first=0, ignore_prolines=False) master_table.set_control(('Full deuteration control', 0.167)) states = master_table.groupby_state() print(states.keys()) series = states['SecB WT apo'] series.make_uniform() split = series.split() key = list(split)[1] series = split[key] kf = KineticsFitting(series, bounds=(0, 200)) print(kf.bounds) if refit: fr1 = kf.weighted_avg_fit()
def setup_class(cls): fpath = directory / 'test_data' / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(fpath)) d = cls.pmt.groupby_state() cls.series = d['SecB WT apo']
def setup_class(cls): fpath = input_dir / 'ecSecB_apo.csv' cls.pmt = PeptideMasterTable(read_dynamx(fpath))
def setup_class(cls): fpath = directory / 'test_data' / 'ecSecB_apo.csv' cls.pf1 = PeptideMasterTable(read_dynamx(fpath))
from pathlib import Path import numpy as np from pyhdx import PeptideMasterTable, KineticsFitting, read_dynamx from pyhdx.fileIO import txt_to_np, fmt_export import pickle fit_dir = 'test_data' directory = os.path.dirname(__file__) np.random.seed(43) fpath = os.path.join(directory, 'test_data', 'ecSecB_apo.csv') data = read_dynamx(fpath) pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_control(('Full deuteration control', 0.167)) states = pmt.groupby_state() series = states['SecB WT apo']
from pathlib import Path import numpy as np from pyhdx import PeptideMasterTable, read_dynamx, HDXMeasurement current_dir = Path(__file__).parent np.random.seed(43) fpath = current_dir.parent / 'tests' / 'test_data' / 'ecSecB_apo.csv' data = read_dynamx(fpath) pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_control(('Full deuteration control', 0.167)) sequence = 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA' hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), sequence=sequence) print(hdxm.coverage.protein) #hdxm.coverage.protein.to_file('test.txt', fmt='pprint')