def score_sequences(_pair, args): # Calculate the best possible scores, and divide by the observed scores id1, id2 = _pair alb_obj, psi_pred_files, outfile = args id_regex = "^%s$|^%s$" % (id1, id2) alb_copy = Alb.make_copy(alb_obj) Alb.pull_records(alb_copy, id_regex) observed_score = 0 seq1_best = 0 seq2_best = 0 seq1, seq2 = alb_copy.records() prev_aa1 = "-" prev_aa2 = "-" for aa_pos in range(alb_copy.lengths()[0]): aa1 = seq1.seq[aa_pos] aa2 = seq2.seq[aa_pos] if aa1 != "-": seq1_best += BLOSUM62[aa1, aa1] if aa2 != "-": seq2_best += BLOSUM62[aa2, aa2] if aa1 == "-" or aa2 == "-": if prev_aa1 == "-" or prev_aa2 == "-": observed_score += gap_extend else: observed_score += gap_open else: observed_score += BLOSUM62[aa1, aa2] prev_aa1 = str(aa1) prev_aa2 = str(aa2) subs_mat_score = ((observed_score / seq1_best) + (observed_score / seq1_best)) / 2 # PSI PRED comparison num_gaps = 0 ss_score = 0 for row1 in psi_pred_files[id1].itertuples(): if (psi_pred_files[id2]["indx"] == row1.indx).any(): row2 = psi_pred_files[id2].loc[psi_pred_files[id2]["indx"] == row1.indx] row_score = 0 row_score += 1 - abs(float(row1.coil_prob) - float(row2.coil_prob)) row_score += 1 - abs(float(row1.helix_prob) - float(row2.helix_prob)) row_score += 1 - abs(float(row1.sheet_prob) - float(row2.sheet_prob)) ss_score += row_score / 3 else: num_gaps += 1 align_len = len(psi_pred_files[id2]) + num_gaps ss_score /= align_len final_score = (ss_score * 0.3) + (subs_mat_score * 0.7) with lock: with open(outfile, "a") as _ofile: _ofile.write("\n%s,%s,%s" % (id1, id2, final_score)) return
def score_sequences(seq_pair): seq1, seq2 = seq_pair.records id_regex = "^%s$|^%s$" % (seq1.id, seq2.id) sb_copy = Sb.make_copy(seqbuddy) Sb.delete_records(sb_copy, id_regex) sb_copy = Sb.SeqBuddy(sb_copy.records + [seq1, seq2], out_format="gb", alpha=sb_copy.alpha) alignbuddy = Alb.generate_msa(sb_copy, tool="mafft", params=" --globalpair", quiet=True) if not in_args.no_msa_trim: alignbuddy = Alb.trimal(alignbuddy, threshold="gappyout") alignbuddy = Alb.pull_records(alignbuddy, id_regex) _score = 0 seq1, seq2 = alignbuddy.records() prev_aa1 = "-" prev_aa2 = "-" for aa_pos in range(alignbuddy.lengths()[0]): aa1 = seq1.seq[aa_pos] aa2 = seq2.seq[aa_pos] if aa1 == "-" or aa2 == "-": if prev_aa1 == "-" or prev_aa2 == "-": _score += gap_extend else: _score += gap_open else: _score += BLOSUM45[aa1, aa2] prev_aa1 = str(aa1) prev_aa2 = str(aa2) return _score
def test_pull_records(key, next_hash, alb_resources, hf): alignbuddy = alb_resources.get_one(key) Alb.pull_records(alignbuddy, "α[1-5]$|β[A-M]") assert hf.buddy2hash(alignbuddy) == next_hash, alignbuddy.write("error_files%s%s" % (next_hash, os.path.sep))