def get_dcalc(assignments_file, kint_file, pfact_file, time_points_file, fragment_number): assignments = read_assignments(assignments_file) kint = read_kint(kint_file, -1) pfact = read_pfact(pfact_file) time_points = read_time_points(time_points_file) residue_range = np.array([assignments[int(fragment_number) - 1]])[0] dcalc_all = np.zeros( (len(time_points), residue_range[2] - residue_range[1] + 1)) for ii, time in enumerate(time_points): k = kint[residue_range[1]:residue_range[2]] p = pfact[residue_range[1]:residue_range[2]] dcalc = np.insert(1.0 - np.exp(-k * 60 * time / p), 0, time) dcalc_all[ii] = dcalc return dcalc_all
def predict_isotopic_envelope(ass_file, seq_file, temperature, pH, lnp_file, times_file, pep, charge_state, exchange, out_file, pi0_file=''): seq = read_seq(seq_file) times = read_time_points(times_file) # Select residues involving the selected peptide ass = read_assignments(ass_file) start_res = ass[int(pep) - 1][1] end_res = ass[int(pep) - 1][2] # Upload kint and lnP values if exchange == 'f': kint, _ = calculate_kint_for_sequence(1, len(seq), seq, float(temperature), float(pH)) kint = kint[start_res:end_res] elif exchange == 'b': kint, _ = calculate_kback_for_sequence(1, len(seq), seq, float(temperature), float(pH)) kint = kint[start_res:end_res] lnP = read_pfact(lnp_file)[start_res:end_res] # Calculate fully protonated isotopic envelope if exchange == 'f': pi0 = fully_protonated_envelope(seq[start_res:end_res + 1], z=charge_state) mass = list(pi0.keys()) fr0 = list(pi0.values()) while len(mass) <= 2 * len(kint[start_res:end_res + 1]): mass.append( (mass[-1] + 1.00627 * int(charge_state)) / charge_state) fr0.append(0) print(mass, fr0) elif exchange == 'b': pi0 = pd.read_csv(pi0_file, skiprows=1, header=None, delim_whitespace=True) mass = list(pi0[1]) u_fr0 = list(pi0[2]) fr0 = centered_isotopic_envelope(0, kint, lnP, u_fr0) # Calculate isotopic envelopes at different times for i in range(len(times)): if exchange == 'f': f1 = centered_isotopic_envelope(times[i], kint, lnP, fr0) elif exchange == 'b': f1 = back_centered_isotopic_envelope(times[i], kint, lnP, fr0) f1 = [f1[j] / sum(f1) * 100 for j in range(len(f1))] with open("%s.%s.isot" % (out_file, str(i)), 'w+') as f: f.write('# ' + seq[start_res:end_res] + '\n') for j in range(len(f1)): f.write('%d\t' % j) f.write('%5.5f\t' % mass[j]) f.write('%5.2f\t' % f1[j]) last_col = f1[j] / max(f1) * 100 if j == len(f1) - 1: f.write('%5.2f' % last_col) else: f.write('%5.2f\n' % last_col)
def main(argv): """ :param argv: input arguments from command line. :return: """ log.info("Running exPfact.py") parser = argparse.ArgumentParser() parser.add_argument("--base") parser.add_argument("--dexp") parser.add_argument("--ass") parser.add_argument("--pfact") parser.add_argument("--weights") parser.add_argument("--out") parser.add_argument("--predict") parser.add_argument("--times") parser.add_argument("--tol") parser.add_argument("--harm") parser.add_argument("--rand") parser.add_argument("--temp") parser.add_argument("--pH") parser.add_argument("--seq") parser.add_argument("--rep") if sys.argv[1].endswith('.json'): config = read_configuration(sys.argv[1]) else: config = {} opts = parser.parse_args() # Compulsory arguments if opts.base: config['base'] = opts.base print("Base directory= ", config['base']) else: config['base'] = os.getcwd() if opts.dexp: config['dexp'], config['times'] = read_dexp(opts.dexp) if opts.ass: config['assignments'] = opts.ass print("ass= ", config['assignments']) if opts.temp: config['temperature'] = float(opts.temp) if opts.pH: config['pH'] = float(opts.pH) if opts.seq: config['sequence'] = read_seq(opts.seq) config['res1'] = 1 config['resn'] = len(read_seq(opts.seq)) # Optional arguments if opts.predict: config['predict'] = True if opts.times: config['time_points'] = opts.times if opts.pfact: config['pfact'] = opts.pfact print("pfactfile= ", config['pfact']) else: config['pfact'] = None if opts.out: config['output'] = opts.out else: config['output'] = None if opts.rand: config['do_random_search'] = True config['random_search_steps'] = int(opts.rand) else: config['do_random_search'] = False config['random_search_steps'] = None if opts.tol: config['tolerance'] = float(opts.tol) else: config['tolerance'] = None if opts.harm: config['harmonic_factor'] = float(opts.harm) else: config['harmonic_factor'] = 0 if opts.weights: config['weights'] = read_dexp(opts.weights)[0] else: config['weights'] = None if opts.rep: n_rep = int(opts.rep) else: n_rep = 1 assignments = read_assignments(config['assignments']) for i in range(n_rep): log.info("Minimization %s of %s" % (str(i), str(n_rep))) if n_rep > 1: outfile = config['output'] + str(i) else: outfile = config['output'] run(config['base'], config['dexp'], assignments, config['pfact'], config['random_search_steps'], config['times'], config['harmonic_factor'], outfile, config['tolerance'], config['weights'], config['pH'], config['temperature'], config['sequence'], config['res1'], config['resn'])
config['pfact'] = read_pfact(opts.pfact) if opts.times: config['times'] = read_time_points(opts.times) if opts.seq: config['sequence'] = read_seq(opts.seq) config['res1'] = 1 config['resn'] = len(read_seq(opts.seq)) # Optional arguments if opts.out: config['output'] = opts.out else: config['output'] = None pfact = config['pfact'] assignments = read_assignments(config['assignments']) assignment_set = set() for ass in assignments: for x in range(int(ass[1]), int(ass[2]) + 1): assignment_set.add(x) kint, prolines = calculate_kint_for_sequence(config['res1'], config['resn'], config['sequence'], config['temperature'], config['pH']) dpred = calculate_dpred(pfact, config['times'], kint, assignments) write_dpred(config['output'], dpred, config['times'])
parser.add_argument("--dexp") parser.add_argument("--ass") parser.add_argument("--temp") parser.add_argument("--pH") parser.add_argument("--seq") if sys.argv[1].endswith('.json'): config = read_configuration(sys.argv[1]) else: config = {} opts = parser.parse_args() # Compulsory arguments if opts.dexp: dexp, time_points = read_dexp(opts.dexp) if opts.ass: ass = read_assignments(opts.ass) if opts.temp: temp = float(opts.temp) if opts.pH: pH = float(opts.pH) if opts.seq: seq = read_seq(opts.seq) res1 = 1 resn = len(read_seq(opts.seq)) log.info("Running cross_validation.py") kint, prolines = calculate_kint_for_sequence(res1, resn, seq, temp, pH) cross_validate(dexp, time_points, ass, lambdas, pH, temp, seq, res1, resn)
if i == len(limiting_residues) - 1: area = "%s-%s" % (first + 1, second + 1) else: area = "%s-%s" % (first + 1, second) areas.append(area) return areas def run_mclust(areas): """ Calls the R script multi.r in folder R, which runs mclust clustering algortithm """ for area in areas: txt = "Rscript ../R/multi.r "+area.split('-')[0]+' '+area.split('-')[1] os.system(txt) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--ass") opts = parser.parse_args() if opts.ass: ass_file = opts.ass ass = read_assignments(ass_file) areas = contiguous_areas(ass) run_mclust(areas)