def test_prolines_kint(seq, expected_prolines): """ Check that the script kint.py correctly identifies prolines along the sequence of the peptide and that the intrinsic exchange rate at those residue is set to -1.0 """ kint, prolines = calculate_kint_for_sequence(1, len(seq), seq, 300, 7) assert len(prolines) == len(expected_prolines) for i in range(len(prolines)): assert prolines[i] == expected_prolines[i] assert kint[prolines[i] - 1] < 0
def test_forward_intrinsic_rates(seq, kint_englander): """ Checks that forward intrinsic exchange rates are correctly calculated by the script kint.py. The results are tested against the rates obtained for the same sequence by the Englander group excel spreadsheet """ # intrinsic exchange rates calculated by kint.py kint, pro = calculate_kint_for_sequence(1, len(seq), seq, 300, 7) for i in range(len(seq)): # check that the rates are the same (maximum difference 1%) assert np.abs(kint[i] / kint_englander[i] - 1) < 1
def predict_isotopic_envelope(ass_file, seq_file, temperature, pH, lnp_file, times_file, pep, charge_state, exchange, out_file, pi0_file=''): seq = read_seq(seq_file) times = read_time_points(times_file) # Select residues involving the selected peptide ass = read_assignments(ass_file) start_res = ass[int(pep) - 1][1] end_res = ass[int(pep) - 1][2] # Upload kint and lnP values if exchange == 'f': kint, _ = calculate_kint_for_sequence(1, len(seq), seq, float(temperature), float(pH)) kint = kint[start_res:end_res] elif exchange == 'b': kint, _ = calculate_kback_for_sequence(1, len(seq), seq, float(temperature), float(pH)) kint = kint[start_res:end_res] lnP = read_pfact(lnp_file)[start_res:end_res] # Calculate fully protonated isotopic envelope if exchange == 'f': pi0 = fully_protonated_envelope(seq[start_res:end_res + 1], z=charge_state) mass = list(pi0.keys()) fr0 = list(pi0.values()) while len(mass) <= 2 * len(kint[start_res:end_res + 1]): mass.append( (mass[-1] + 1.00627 * int(charge_state)) / charge_state) fr0.append(0) print(mass, fr0) elif exchange == 'b': pi0 = pd.read_csv(pi0_file, skiprows=1, header=None, delim_whitespace=True) mass = list(pi0[1]) u_fr0 = list(pi0[2]) fr0 = centered_isotopic_envelope(0, kint, lnP, u_fr0) # Calculate isotopic envelopes at different times for i in range(len(times)): if exchange == 'f': f1 = centered_isotopic_envelope(times[i], kint, lnP, fr0) elif exchange == 'b': f1 = back_centered_isotopic_envelope(times[i], kint, lnP, fr0) f1 = [f1[j] / sum(f1) * 100 for j in range(len(f1))] with open("%s.%s.isot" % (out_file, str(i)), 'w+') as f: f.write('# ' + seq[start_res:end_res] + '\n') for j in range(len(f1)): f.write('%d\t' % j) f.write('%5.5f\t' % mass[j]) f.write('%5.2f\t' % f1[j]) last_col = f1[j] / max(f1) * 100 if j == len(f1) - 1: f.write('%5.2f' % last_col) else: f.write('%5.2f\n' % last_col)
def run(base_dir, dexp, assignments, pfact, random_steps, time_points, harmonic_term, output_file, tolerance, weights, pH, temperature, seq, res1, resn): """ :param base_dir: base directory for all input files. :param dexp: file containing dexp values. :param assignments: file containing assignments of kints to dexp values. :param pfact: file containing pfactor values. :param random_steps: number of steps for random search. :param time_points: a list of experiment time points. :param harmonic_term: term to be used for harmonic cost scoring. :param output_file: stub for all output files. :param tolerance: tolerance value for minimisation convergence. :return: """ assignment_set = set() for ass in assignments: for x in range(int(ass[1]), int(ass[2]) + 1): assignment_set.add(x) pfactor_filter = set() for ass in assignments: for x in range(int(ass[1] + 1), int(ass[2]) + 1): pfactor_filter.add(x) if ass[1] < min(pfactor_filter): pfactor_filter.add(ass[1]) kint, prolines = calculate_kint_for_sequence(res1, resn, seq, temperature, pH) if not pfact: if random_steps: rand_output = do_random_search(kint, random_steps, pfactor_filter, dexp, time_points, assignments, harmonic_term, prolines, weights, seed=None) min_score = min(rand_output.keys()) init_array = rand_output[min_score] else: init_array = [ 1 if ii not in prolines or ii == 0 or ii + 1 in pfactor_filter else -1 for ii in range(max(pfactor_filter)) ] else: init_array = read_pfact(pfact) bounds = [(0.00001, 20) if x >= 0 else (-1, -1) if x == -1 else (0, 0) for x in init_array] pfit = fit_pfact(init_array, dexp, time_points, assignments, harmonic_term, kint, bounds, tolerance, weights) write_pfact(pfit.x, output_file) dpred = calculate_dpred(pfit.x, time_points, kint, assignments) write_dpred(output_file, dpred, time_points) write_diff(output_file, dpred, dexp) final_score = cost_function(pfit.x, dexp, time_points, assignments, harmonic_term, kint, weights) print('Final value of cost function w harm term: {}'.format(final_score)) final_score = cost_function(pfit.x, dexp, time_points, assignments, 0.0, kint, weights) print('Final value of cost function w/o harm term: {}'.format(final_score))
config['pfact'] = read_pfact(opts.pfact) if opts.times: config['times'] = read_time_points(opts.times) if opts.seq: config['sequence'] = read_seq(opts.seq) config['res1'] = 1 config['resn'] = len(read_seq(opts.seq)) # Optional arguments if opts.out: config['output'] = opts.out else: config['output'] = None pfact = config['pfact'] assignments = read_assignments(config['assignments']) assignment_set = set() for ass in assignments: for x in range(int(ass[1]), int(ass[2]) + 1): assignment_set.add(x) kint, prolines = calculate_kint_for_sequence(config['res1'], config['resn'], config['sequence'], config['temperature'], config['pH']) dpred = calculate_dpred(pfact, config['times'], kint, assignments) write_dpred(config['output'], dpred, config['times'])
parser.add_argument("--dexp") parser.add_argument("--ass") parser.add_argument("--temp") parser.add_argument("--pH") parser.add_argument("--seq") if sys.argv[1].endswith('.json'): config = read_configuration(sys.argv[1]) else: config = {} opts = parser.parse_args() # Compulsory arguments if opts.dexp: dexp, time_points = read_dexp(opts.dexp) if opts.ass: ass = read_assignments(opts.ass) if opts.temp: temp = float(opts.temp) if opts.pH: pH = float(opts.pH) if opts.seq: seq = read_seq(opts.seq) res1 = 1 resn = len(read_seq(opts.seq)) log.info("Running cross_validation.py") kint, prolines = calculate_kint_for_sequence(res1, resn, seq, temp, pH) cross_validate(dexp, time_points, ass, lambdas, pH, temp, seq, res1, resn)