Example #1
0
 def extract_features(self, idx, pattern_name, pattern_start, pattern_end,
                      pattern, strand):
     task = self.profile_mapping[pattern_name]
     ref_profile = self.ref_profiles[pattern_name][task]
     narrow_seqlet = Seqlet(idx,
                            pattern_start,
                            pattern_end,
                            name=pattern,
                            strand=strand)
     wide_seqlet = narrow_seqlet.resize(70)
     ref_preds = self.dalt_preds[task][idx]  # all predictions
     ref_preds_seqlet = wide_seqlet.extract(self.dalt_preds[task])
     ref_preds_inside = ref_preds_seqlet.sum()
     ref_preds_total = ref_preds.sum()
     ref_preds_outside = ref_preds_total - ref_preds_inside
     try:
         ref_preds_match = symmetric_kl(
             ref_preds_seqlet,
             ref_profile).mean()  # compare with the reference
     except Exception:
         ref_preds_match = np.nan
     return {
         "pred": {
             "inside": ref_preds_inside,
             "outside": ref_preds_outside,
             "total": ref_preds_total,
             "match": ref_preds_match
         }
     }
Example #2
0
def dfi_row2seqlet(row, motifs_inv):
    seqlet = Seqlet(row.example_idx,
                    row.pattern_start,
                    row.pattern_end,
                    name=motifs_inv[shorten_te_pattern(row.pattern)],
                    strand=row.strand)
    seqlet.alpha = row.match_weighted_p
    return seqlet
def dfi_row2seqlet(row, short_name=False):
    return Seqlet(
        row.example_idx,
        row.pattern_start,
        row.pattern_end,
        name=shorten_pattern(row.pattern) if short_name else row.pattern,
        strand=row.strand)
Example #4
0
        def fn(dist):
            position = dist + 500
            sstart, send = motif_coords(side_motif, position)
            seqlets = [
                Seqlet(None, cstart, cend, "center", ""),
                Seqlet(None, sstart, send, "side", "")
            ]
            # TODO - add also importance scores
            du = {"p": p[position], "imp": imp[position]}

            # TODO - order them correctly
            d = OrderedDict([(f"{prefix}/{task}", du[prefix][task])
                             for task in p[position]
                             for prefix in ['p', 'imp']])

            ylims = []
            for k in d:
                if k.startswith("p"):
                    ylims.append((0, ymax))
                else:
                    ylims.append((0, ymax_imp))
            plot_tracks(d, seqlets, title=dist, ylim=ylims)
Example #5
0
 def __getitem__(self, idx):
     pair = self.dfab.iloc[idx]
     narrow_seqlet_x = Seqlet(seqname=pair.example_idx,
                              start=pair.pattern_start_x,
                              end=pair.pattern_end_x,
                              name="",
                              strand=pair.strand_x)
     narrow_seqlet_y = Seqlet(seqname=pair.example_idx,
                              start=pair.pattern_start_y,
                              end=pair.pattern_end_y,
                              name="",
                              strand=pair.strand_y)
     return {
         "x":
         self.extract(narrow_seqlet_x,
                      this_row_idx=pair.row_idx_x,
                      other_row_idx=pair.row_idx_y,
                      motif_pair_idx=pair.motif_pair_idx),
         "y":
         self.extract(narrow_seqlet_y,
                      this_row_idx=pair.row_idx_y,
                      other_row_idx=pair.row_idx_x,
                      motif_pair_idx=pair.motif_pair_idx)
     }
Example #6
0
    def get_change(self, mutated_seqlet_idx, signal_seqlet_idx):
        inst = self.dfi.iloc[signal_seqlet_idx]

        assert inst.row_idx == signal_seqlet_idx

        task = self.profile_mapping[inst.pattern_name]

        ref_profile = self.ref_profiles[inst.pattern_name][task]

        mutated_inst = self.dfi.iloc[mutated_seqlet_idx]
        assert inst.example_idx == mutated_inst.example_idx

        narrow_seqlet = Seqlet(inst.example_idx,
                               inst.pattern_start,
                               inst.pattern_end,
                               name=inst.pattern,
                               strand=inst.strand)
        wide_seqlet = narrow_seqlet.resize(70)

        # ref
        ref_preds = self.preds[task][inst.example_idx]  # all predictions
        ref_preds_seqlet = wide_seqlet.extract(self.preds[task])
        ref_preds_inside = ref_preds_seqlet.sum()
        ref_preds_outside = ref_preds.sum() - ref_preds_inside

        ref_obs = self.profiles[task][inst.example_idx]  # all predictions
        ref_obs_seqlet = wide_seqlet.extract(self.profiles[task])
        ref_obs_inside = ref_obs_seqlet.sum()
        ref_obs_outside = ref_obs.sum() - ref_obs_inside
        try:
            ref_preds_match = symmetric_kl(
                ref_preds_seqlet,
                ref_profile).mean()  # compare with the reference
        except Exception:
            ref_preds_match = np.nan

        # ref imp
        ref_imp_scores = self.ref_imp_scores_contrib[f"{task}/weighted"][
            inst.example_idx]
        ref_imp_scores_seqlet = narrow_seqlet.extract(
            self.ref_imp_scores_contrib[f"{task}/weighted"])
        ref_imp_inside = ref_imp_scores_seqlet.sum(
        )  # sum in the seqlet region
        ref_imp_outside = ref_imp_scores.sum(
        ) - ref_imp_inside  # total - seqlet

        # ref imp counts
        ref_imp_scores_c = self.ref_imp_scores_contrib[f"{task}/count"][
            inst.example_idx]
        ref_imp_scores_seqlet_c = narrow_seqlet.extract(
            self.ref_imp_scores_contrib[f"{task}/count"])
        ref_imp_inside_c = ref_imp_scores_seqlet_c.sum(
        )  # sum in the seqlet region
        ref_imp_outside_c = ref_imp_scores_c.sum(
        ) - ref_imp_inside_c  # total - seqlet

        # alt
        narrow_seqlet.seqname = mutated_seqlet_idx  # change the sequence name
        wide_seqlet.seqname = mutated_seqlet_idx

        alt_preds = self.alt_preds[task][mutated_seqlet_idx]
        alt_preds_seqlet = wide_seqlet.extract(self.alt_preds[task])
        alt_preds_inside = alt_preds_seqlet.sum()  # sum in the seqlet region
        alt_preds_outside = alt_preds.sum(
        ) - alt_preds_inside  # total - seqlet
        try:
            alt_preds_match = symmetric_kl(alt_preds_seqlet,
                                           ref_profile).mean()
        except Exception:
            alt_preds_match = np.nan

        alt_imp_scores = self.alt_imp_scores_contrib[f"{task}/weighted"][
            mutated_seqlet_idx]
        alt_imp_scores_seqlet = narrow_seqlet.extract(
            self.alt_imp_scores_contrib[f"{task}/weighted"])
        alt_imp_inside = alt_imp_scores_seqlet.sum(
        )  # sum in the seqlet region
        alt_imp_outside = alt_imp_scores.sum(
        ) - alt_imp_inside  # total - seqlet

        alt_imp_scores_c = self.alt_imp_scores_contrib[f"{task}/count"][
            mutated_seqlet_idx]
        alt_imp_scores_seqlet_c = narrow_seqlet.extract(
            self.alt_imp_scores_contrib[f"{task}/count"])
        alt_imp_inside_c = alt_imp_scores_seqlet_c.sum(
        )  # sum in the seqlet region
        alt_imp_outside_c = alt_imp_scores_c.sum(
        ) - alt_imp_inside_c  # total - seqlet

        return {
            "ref": {
                "obs": {
                    "inside": ref_obs_inside,
                    "outside": ref_obs_outside
                },
                "pred": {
                    "inside": ref_preds_inside,
                    "outside": ref_preds_outside,
                    "match": ref_preds_match
                },
                "imp": {
                    "inside": ref_imp_inside,
                    "outside": ref_imp_outside,
                },
                "impcount": {
                    "inside": ref_imp_inside_c,
                    "outside": ref_imp_outside_c,
                },
            },
            "alt": {
                "pred": {
                    "inside": alt_preds_inside,
                    "outside": alt_preds_outside,
                    "match": alt_preds_match
                },
                "imp": {
                    "inside": alt_imp_inside,
                    "outside": alt_imp_outside,
                },
                "impcount": {
                    "inside": alt_imp_inside_c,
                    "outside": alt_imp_outside_c,
                },
            },
        }