def start_run((run, seq, sec_structs, sparc_dir)): assert len(run) >= 2, "Run directive is invalid: {}".format(run) weights = { "consec": 3.0, "secondary": 3.0, "short_range": 2.0, "long_range": 2.0, "medium": 3.0 } distributions = load_dists(sparc_dir, concurrent=False, secondary=True, weights=weights) #load_central_dist(sparc_dir, secondary=True) if len(run[1]) > 1: # run[1] must be the input paths, and run[2] must be the output path name extra_args = { "sec_structs": sec_structs } if len(run) > 3: # Get extra parameters for arg in run[3:]: arg = ','.join(arg) kv = arg.split("=") extra_args[kv[0]] = kv[1] range1 = [int(x) for x in run[0][0].split("-")] range2 = [int(x) for x in run[0][1].split("-")] infiles = [os.path.join(output, nm) for nm in run[1]] apply_dist_weights(distributions, { "consec": 1.0, "secondary": 1.0, "short_range": 3.0, "long_range": 5.0, "medium": 3.0 }) return segment_fold(sparc_dir, distributions, seq, range1, range2, infiles, output, outname=run[2][0], **extra_args) else: # run[1] must be the output path name extra_args = { "sec_structs": sec_structs } if len(run) > 2: # Get extra parameters for arg in run[2:]: arg = ','.join(arg) kv = arg.split("=") extra_args[kv[0]] = kv[1] range = [int(x) for x in run[0][0].split("-")] if "weights" in extra_args: weightlist = extra_args["weights"].split(",") assert len(weightlist) == 5, "Need exactly 5 weight specifications, not {}".format(len(weightlist)) apply_dist_weights(distributions, { "consec": float(weightlist[0]), "secondary": float(weightlist[1]), "short_range": float(weightlist[2]), "long_range": float(weightlist[3]), "medium": float(weightlist[4]) }) del extra_args["weights"] elif range[1] - range[0] > 7: apply_dist_weights(distributions, { "consec": 2.0, "secondary": 2.0, "short_range": 4.0, "long_range": 5.0, "medium": 4.0 }) return simulate_fold(sparc_dir, distributions, seq, range, output, outname=run[1][0], **extra_args)
def segment_fold(sparc_dir, dists, seq, range1, range2, infiles, output, sec_structs=None, outname="simulation_test.pdb", cluster_confs=25, sims=20, candidates=60): cluster_confs = int(cluster_confs) sims = int(sims) candidates = int(candidates) cache_weights = extract_dist_weights(dists) apply_dist_weights(dists, {"consec": 0.0, "secondary": 0.0, "short_range": 3.0, "long_range": 1.0, "medium": 4.0}) permissions = AAPermissionsManager(os.path.join(sparc_dir, "permissions"), os.path.join(sparc_dir, "permissible_sequences", "all.txt")) sec_struct_permissions = AASecondaryStructurePermissionsManager(os.path.join(sparc_dir, "permissible_sequences")) peptide = Polypeptide() system = MolecularSystem([peptide]) seq1 = seq[range1[0] - 1 : range1[1]] seq2 = seq[range2[0] - 1 : range2[1]] seg_prob = AAConstructiveProbabilitySource(peptide, (0, len(seq1)), (len(seq1), len(seq1) + len(seq2)), dists, permissions, sec_struct_permissions, system=system) seg_prob.steric_cutoff = 0.0 for i, inf in enumerate(infiles): seg_prob.load_cluster_conformations(i + 1, inf, n=cluster_confs) '''peptide.randomcoil(seq[range1[0] - 1 : range1[1]], permissions=permissions, struct_permissions=sec_struct_permissions) tmp_path = "/Users/venkatesh-sivaraman/Documents/School/Science Fair/2016-proteins/Simulations/sp_rmsd_tmp" total_rmsd = 0.0 num_rmsd = 0 for conf, score, probability in seg_prob.c1_conformations: with open(os.path.join(tmp_path, "candidate.pdb"), "w") as file: for i, aa in enumerate(peptide.aminoacids): aa.acarbon = conf[i].alpha_zone aa.set_axes(conf[i].x_axis, conf[i].y_axis, conf[i].z_axis) file.write(peptide.pdb()) total_rmsd += min_rmsd(os.path.join(tmp_path, "candidate.pdb"), "/Users/venkatesh-sivaraman/Downloads/1QLQ.pdb", range=range1) num_rmsd += 1 del peptide.aminoacids[:] peptide.hashtable.clear() peptide.randomcoil(seq[range2[0] - 1 : range2[1]], permissions=permissions, struct_permissions=sec_struct_permissions) for conf, score, probability in seg_prob.c2_conformations: with open(os.path.join(tmp_path, "candidate.pdb"), "w") as file: for i, aa in enumerate(peptide.aminoacids): aa.acarbon = conf[i].alpha_zone aa.set_axes(conf[i].x_axis, conf[i].y_axis, conf[i].z_axis) file.write(peptide.pdb()) total_rmsd += min_rmsd(os.path.join(tmp_path, "candidate.pdb"), "/Users/venkatesh-sivaraman/Downloads/1QLQ.pdb", range=range2) num_rmsd += 1 print "Average:", total_rmsd / num_rmsd return (outname, 0.0)''' #First, test all possible combos of the segments with a random linking orientation i = 0 j = 0 confscores = [] print "Preliminary conformation testing..." queue = Queue() for i in xrange(len(seg_prob.c1_conformations)): print "Testing c1", i for j in xrange(len(seg_prob.c2_conformations)): p = Process(target=test_segment_combo, args=(queue, dists, seg_prob, seq1, seq2, seg_prob.c1_conformations[i][0], seg_prob.c2_conformations[j][0])) p.start() p.join() # this blocks until the process terminates result = queue.get() #result = test_segment_combo(None, dists, seg_prob, seq1, seq2, seg_prob.c1_conformations[i][0], seg_prob.c2_conformations[j][0]) if result != 0: confscores.append([i, j, result]) confscores = sorted(confscores, key=lambda x: x[2]) confscores = confscores[:min(len(confscores), candidates)] print "The range of", len(confscores), "scores is", confscores[0][2], "to", confscores[-1][2] gc.collect() apply_dist_weights(dists, cache_weights) file = open(os.path.join(output, outname), 'w') scoresfile = open(os.path.join(output, outname[:-4] + "-scores.txt"), 'w') scoresfile.write("\t".join(dist.identifier for dist in dists) + "\n") pdb_model_idx = 1 prob = AAProbabilitySource(peptide, dists, permissions, sec_struct_permissions) prob.mode = psource_gentle_mode prob.steric_cutoff = 0.0 model_count = 10 best_models = [[] for i in xrange(model_count)] best_scores = [1000000 for i in xrange(model_count)] a = datetime.datetime.now() for confidx, (i, j, score) in enumerate(confscores): print "Testing combination {}-{} ({})...".format(i, j, score) aas, hashtable = seg_prob.generate_structure_from_segments(seq1 + seq2, seg_prob.c1_conformations[i][0], seg_prob.c2_conformations[j][0]) peptide.add_aas(aas) if sec_structs: if ',' in sec_structs: peptide.add_secondary_structures(sec_structs, format='csv', range=(range1[0], range2[1])) else: peptide.add_secondary_structures(sec_structs, format='pdb', range=(range1[0], range2[1])) system.center() scores = [] t_scores = "" curscore = 0.0 for dist in dists: sc = dist.score(peptide, peptide.aminoacids) t_scores += "{:.5f}\t".format(sc) curscore += sc scores.append(curscore) scoresfile.write(str(pdb_model_idx) + " " + t_scores[:-1] + "\n") file.write(system.pdb(modelno=pdb_model_idx)) pdb_model_idx += 1 for n in xrange(sims): #seglen = segment_length(scores[-1] / len(peptide.aminoacids)) constructive_folding_iteration(system, [seg_prob]) system.center() curscore = 0.0 t_scores = "" for dist in dists: sc = dist.score(peptide, peptide.aminoacids, system=system) t_scores += "{:.5f}\t".format(sc) curscore += sc scores.append(curscore) scoresfile.write(str(pdb_model_idx) + " " + t_scores[:-1] + "\n") file.write(system.pdb(modelno=pdb_model_idx)) pdb_model_idx += 1 #Save the conformation if it is the best so far. for k in xrange(model_count): if scores[-1] < best_scores[k] * 1.2: for m in reversed(xrange(max(k, 1), model_count)): best_scores[m] = best_scores[m - 1] best_models[m] = best_models[m - 1] best_scores[k] = scores[-1] best_models[k] = [PositionZone(aa.acarbon, aa.i, aa.j, aa.k) for aa in peptide.aminoacids] break elif scores[-1] <= best_scores[k]: break if confidx != len(confscores) - 1: del peptide.aminoacids[:] peptide.hashtable = None gc.collect() b = datetime.datetime.now() del peptide.aminoacids[:] peptide = None del scores del system gc.collect() scoresfile.write("\n" + str((b - a).total_seconds())) file.close() scoresfile.close() return outname, (b - a).total_seconds()