Exemple #1
0
def test_prolines_kint(seq, expected_prolines):
    """ Check that the script kint.py correctly identifies prolines along
    the sequence of the peptide and that the intrinsic exchange rate at those
    residue is set to -1.0 """
    kint, prolines = calculate_kint_for_sequence(1, len(seq), seq, 300, 7)
    assert len(prolines) == len(expected_prolines)
    for i in range(len(prolines)):
        assert prolines[i] == expected_prolines[i]
        assert kint[prolines[i] - 1] < 0
Exemple #2
0
def test_forward_intrinsic_rates(seq, kint_englander):
    """ Checks that forward intrinsic exchange rates are correctly calculated
    by the script kint.py. The results are tested against the rates obtained
    for the same sequence by the Englander group excel spreadsheet """
    # intrinsic exchange rates calculated by kint.py
    kint, pro = calculate_kint_for_sequence(1, len(seq), seq, 300, 7)
    for i in range(len(seq)):
        # check that the rates are the same (maximum difference 1%)
        assert np.abs(kint[i] / kint_englander[i] - 1) < 1
Exemple #3
0
def predict_isotopic_envelope(ass_file,
                              seq_file,
                              temperature,
                              pH,
                              lnp_file,
                              times_file,
                              pep,
                              charge_state,
                              exchange,
                              out_file,
                              pi0_file=''):

    seq = read_seq(seq_file)
    times = read_time_points(times_file)

    # Select residues involving the selected peptide
    ass = read_assignments(ass_file)
    start_res = ass[int(pep) - 1][1]
    end_res = ass[int(pep) - 1][2]

    # Upload kint and lnP values
    if exchange == 'f':
        kint, _ = calculate_kint_for_sequence(1, len(seq), seq,
                                              float(temperature), float(pH))
        kint = kint[start_res:end_res]
    elif exchange == 'b':
        kint, _ = calculate_kback_for_sequence(1, len(seq), seq,
                                               float(temperature), float(pH))
        kint = kint[start_res:end_res]

    lnP = read_pfact(lnp_file)[start_res:end_res]
    # Calculate fully protonated isotopic envelope
    if exchange == 'f':
        pi0 = fully_protonated_envelope(seq[start_res:end_res + 1],
                                        z=charge_state)
        mass = list(pi0.keys())
        fr0 = list(pi0.values())
        while len(mass) <= 2 * len(kint[start_res:end_res + 1]):
            mass.append(
                (mass[-1] + 1.00627 * int(charge_state)) / charge_state)
            fr0.append(0)
            print(mass, fr0)
    elif exchange == 'b':
        pi0 = pd.read_csv(pi0_file,
                          skiprows=1,
                          header=None,
                          delim_whitespace=True)
        mass = list(pi0[1])
        u_fr0 = list(pi0[2])
        fr0 = centered_isotopic_envelope(0, kint, lnP, u_fr0)

    # Calculate isotopic envelopes at different times
    for i in range(len(times)):
        if exchange == 'f':
            f1 = centered_isotopic_envelope(times[i], kint, lnP, fr0)
        elif exchange == 'b':
            f1 = back_centered_isotopic_envelope(times[i], kint, lnP, fr0)

        f1 = [f1[j] / sum(f1) * 100 for j in range(len(f1))]
        with open("%s.%s.isot" % (out_file, str(i)), 'w+') as f:
            f.write('# ' + seq[start_res:end_res] + '\n')
            for j in range(len(f1)):
                f.write('%d\t' % j)
                f.write('%5.5f\t' % mass[j])
                f.write('%5.2f\t' % f1[j])
                last_col = f1[j] / max(f1) * 100
                if j == len(f1) - 1:
                    f.write('%5.2f' % last_col)
                else:
                    f.write('%5.2f\n' % last_col)
Exemple #4
0
def run(base_dir, dexp, assignments, pfact, random_steps, time_points,
        harmonic_term, output_file, tolerance, weights, pH, temperature, seq,
        res1, resn):
    """

    :param base_dir: base directory for all input files.
    :param dexp: file containing dexp values.
    :param assignments: file containing assignments of kints to dexp values.
    :param pfact: file containing pfactor values.
    :param random_steps: number of steps for random search.
    :param time_points: a list of experiment time points.
    :param harmonic_term: term to be used for harmonic cost scoring.
    :param output_file: stub for all output files.
    :param tolerance: tolerance value for minimisation convergence.
    :return:
    """

    assignment_set = set()
    for ass in assignments:
        for x in range(int(ass[1]), int(ass[2]) + 1):
            assignment_set.add(x)

    pfactor_filter = set()
    for ass in assignments:
        for x in range(int(ass[1] + 1), int(ass[2]) + 1):
            pfactor_filter.add(x)
        if ass[1] < min(pfactor_filter):
            pfactor_filter.add(ass[1])

    kint, prolines = calculate_kint_for_sequence(res1, resn, seq, temperature,
                                                 pH)

    if not pfact:
        if random_steps:
            rand_output = do_random_search(kint,
                                           random_steps,
                                           pfactor_filter,
                                           dexp,
                                           time_points,
                                           assignments,
                                           harmonic_term,
                                           prolines,
                                           weights,
                                           seed=None)
            min_score = min(rand_output.keys())
            init_array = rand_output[min_score]
        else:
            init_array = [
                1 if ii not in prolines or ii == 0 or ii + 1 in pfactor_filter
                else -1 for ii in range(max(pfactor_filter))
            ]

    else:
        init_array = read_pfact(pfact)

    bounds = [(0.00001, 20) if x >= 0 else (-1, -1) if x == -1 else (0, 0)
              for x in init_array]

    pfit = fit_pfact(init_array, dexp, time_points, assignments, harmonic_term,
                     kint, bounds, tolerance, weights)

    write_pfact(pfit.x, output_file)

    dpred = calculate_dpred(pfit.x, time_points, kint, assignments)

    write_dpred(output_file, dpred, time_points)
    write_diff(output_file, dpred, dexp)

    final_score = cost_function(pfit.x, dexp, time_points, assignments,
                                harmonic_term, kint, weights)
    print('Final value of cost function w harm term: {}'.format(final_score))
    final_score = cost_function(pfit.x, dexp, time_points, assignments, 0.0,
                                kint, weights)
    print('Final value of cost function w/o harm term: {}'.format(final_score))
Exemple #5
0
    config['pfact'] = read_pfact(opts.pfact)
if opts.times:
    config['times'] = read_time_points(opts.times)
if opts.seq:
    config['sequence'] = read_seq(opts.seq)
    config['res1'] = 1
    config['resn'] = len(read_seq(opts.seq))

# Optional arguments
if opts.out:
    config['output'] = opts.out
else:
    config['output'] = None

pfact = config['pfact']
assignments = read_assignments(config['assignments'])

assignment_set = set()
for ass in assignments:
    for x in range(int(ass[1]), int(ass[2]) + 1):
        assignment_set.add(x)

kint, prolines = calculate_kint_for_sequence(config['res1'], config['resn'],
                                             config['sequence'],
                                             config['temperature'],
                                             config['pH'])

dpred = calculate_dpred(pfact, config['times'], kint, assignments)

write_dpred(config['output'], dpred, config['times'])
Exemple #6
0
    parser.add_argument("--dexp")
    parser.add_argument("--ass")
    parser.add_argument("--temp")
    parser.add_argument("--pH")
    parser.add_argument("--seq")

    if sys.argv[1].endswith('.json'):
        config = read_configuration(sys.argv[1])
    else:
        config = {}
        opts = parser.parse_args()

        # Compulsory arguments
        if opts.dexp:
            dexp, time_points = read_dexp(opts.dexp)
        if opts.ass:
            ass = read_assignments(opts.ass)
        if opts.temp:
            temp = float(opts.temp)
        if opts.pH:
            pH = float(opts.pH)
        if opts.seq:
            seq = read_seq(opts.seq)
            res1 = 1
            resn = len(read_seq(opts.seq))

    log.info("Running cross_validation.py")
    kint, prolines = calculate_kint_for_sequence(res1, resn, seq, temp, pH)
    cross_validate(dexp, time_points, ass, lambdas, pH, temp, seq, res1, resn)