def extract_features(self, idx, pattern_name, pattern_start, pattern_end, pattern, strand): task = self.profile_mapping[pattern_name] ref_profile = self.ref_profiles[pattern_name][task] narrow_seqlet = Seqlet(idx, pattern_start, pattern_end, name=pattern, strand=strand) wide_seqlet = narrow_seqlet.resize(70) ref_preds = self.dalt_preds[task][idx] # all predictions ref_preds_seqlet = wide_seqlet.extract(self.dalt_preds[task]) ref_preds_inside = ref_preds_seqlet.sum() ref_preds_total = ref_preds.sum() ref_preds_outside = ref_preds_total - ref_preds_inside try: ref_preds_match = symmetric_kl( ref_preds_seqlet, ref_profile).mean() # compare with the reference except Exception: ref_preds_match = np.nan return { "pred": { "inside": ref_preds_inside, "outside": ref_preds_outside, "total": ref_preds_total, "match": ref_preds_match } }
def dfi_row2seqlet(row, motifs_inv): seqlet = Seqlet(row.example_idx, row.pattern_start, row.pattern_end, name=motifs_inv[shorten_te_pattern(row.pattern)], strand=row.strand) seqlet.alpha = row.match_weighted_p return seqlet
def dfi_row2seqlet(row, short_name=False): return Seqlet( row.example_idx, row.pattern_start, row.pattern_end, name=shorten_pattern(row.pattern) if short_name else row.pattern, strand=row.strand)
def fn(dist): position = dist + 500 sstart, send = motif_coords(side_motif, position) seqlets = [ Seqlet(None, cstart, cend, "center", ""), Seqlet(None, sstart, send, "side", "") ] # TODO - add also importance scores du = {"p": p[position], "imp": imp[position]} # TODO - order them correctly d = OrderedDict([(f"{prefix}/{task}", du[prefix][task]) for task in p[position] for prefix in ['p', 'imp']]) ylims = [] for k in d: if k.startswith("p"): ylims.append((0, ymax)) else: ylims.append((0, ymax_imp)) plot_tracks(d, seqlets, title=dist, ylim=ylims)
def __getitem__(self, idx): pair = self.dfab.iloc[idx] narrow_seqlet_x = Seqlet(seqname=pair.example_idx, start=pair.pattern_start_x, end=pair.pattern_end_x, name="", strand=pair.strand_x) narrow_seqlet_y = Seqlet(seqname=pair.example_idx, start=pair.pattern_start_y, end=pair.pattern_end_y, name="", strand=pair.strand_y) return { "x": self.extract(narrow_seqlet_x, this_row_idx=pair.row_idx_x, other_row_idx=pair.row_idx_y, motif_pair_idx=pair.motif_pair_idx), "y": self.extract(narrow_seqlet_y, this_row_idx=pair.row_idx_y, other_row_idx=pair.row_idx_x, motif_pair_idx=pair.motif_pair_idx) }
def get_change(self, mutated_seqlet_idx, signal_seqlet_idx): inst = self.dfi.iloc[signal_seqlet_idx] assert inst.row_idx == signal_seqlet_idx task = self.profile_mapping[inst.pattern_name] ref_profile = self.ref_profiles[inst.pattern_name][task] mutated_inst = self.dfi.iloc[mutated_seqlet_idx] assert inst.example_idx == mutated_inst.example_idx narrow_seqlet = Seqlet(inst.example_idx, inst.pattern_start, inst.pattern_end, name=inst.pattern, strand=inst.strand) wide_seqlet = narrow_seqlet.resize(70) # ref ref_preds = self.preds[task][inst.example_idx] # all predictions ref_preds_seqlet = wide_seqlet.extract(self.preds[task]) ref_preds_inside = ref_preds_seqlet.sum() ref_preds_outside = ref_preds.sum() - ref_preds_inside ref_obs = self.profiles[task][inst.example_idx] # all predictions ref_obs_seqlet = wide_seqlet.extract(self.profiles[task]) ref_obs_inside = ref_obs_seqlet.sum() ref_obs_outside = ref_obs.sum() - ref_obs_inside try: ref_preds_match = symmetric_kl( ref_preds_seqlet, ref_profile).mean() # compare with the reference except Exception: ref_preds_match = np.nan # ref imp ref_imp_scores = self.ref_imp_scores_contrib[f"{task}/weighted"][ inst.example_idx] ref_imp_scores_seqlet = narrow_seqlet.extract( self.ref_imp_scores_contrib[f"{task}/weighted"]) ref_imp_inside = ref_imp_scores_seqlet.sum( ) # sum in the seqlet region ref_imp_outside = ref_imp_scores.sum( ) - ref_imp_inside # total - seqlet # ref imp counts ref_imp_scores_c = self.ref_imp_scores_contrib[f"{task}/count"][ inst.example_idx] ref_imp_scores_seqlet_c = narrow_seqlet.extract( self.ref_imp_scores_contrib[f"{task}/count"]) ref_imp_inside_c = ref_imp_scores_seqlet_c.sum( ) # sum in the seqlet region ref_imp_outside_c = ref_imp_scores_c.sum( ) - ref_imp_inside_c # total - seqlet # alt narrow_seqlet.seqname = mutated_seqlet_idx # change the sequence name wide_seqlet.seqname = mutated_seqlet_idx alt_preds = self.alt_preds[task][mutated_seqlet_idx] alt_preds_seqlet = wide_seqlet.extract(self.alt_preds[task]) alt_preds_inside = alt_preds_seqlet.sum() # sum in the seqlet region alt_preds_outside = alt_preds.sum( ) - alt_preds_inside # total - seqlet try: alt_preds_match = symmetric_kl(alt_preds_seqlet, ref_profile).mean() except Exception: alt_preds_match = np.nan alt_imp_scores = self.alt_imp_scores_contrib[f"{task}/weighted"][ mutated_seqlet_idx] alt_imp_scores_seqlet = narrow_seqlet.extract( self.alt_imp_scores_contrib[f"{task}/weighted"]) alt_imp_inside = alt_imp_scores_seqlet.sum( ) # sum in the seqlet region alt_imp_outside = alt_imp_scores.sum( ) - alt_imp_inside # total - seqlet alt_imp_scores_c = self.alt_imp_scores_contrib[f"{task}/count"][ mutated_seqlet_idx] alt_imp_scores_seqlet_c = narrow_seqlet.extract( self.alt_imp_scores_contrib[f"{task}/count"]) alt_imp_inside_c = alt_imp_scores_seqlet_c.sum( ) # sum in the seqlet region alt_imp_outside_c = alt_imp_scores_c.sum( ) - alt_imp_inside_c # total - seqlet return { "ref": { "obs": { "inside": ref_obs_inside, "outside": ref_obs_outside }, "pred": { "inside": ref_preds_inside, "outside": ref_preds_outside, "match": ref_preds_match }, "imp": { "inside": ref_imp_inside, "outside": ref_imp_outside, }, "impcount": { "inside": ref_imp_inside_c, "outside": ref_imp_outside_c, }, }, "alt": { "pred": { "inside": alt_preds_inside, "outside": alt_preds_outside, "match": alt_preds_match }, "imp": { "inside": alt_imp_inside, "outside": alt_imp_outside, }, "impcount": { "inside": alt_imp_inside_c, "outside": alt_imp_outside_c, }, }, }