def test_dataframe():
    "tests dataframe production"
    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               PeakSelectorTask(),
               DataFrameTask(merge=True, measures=dict(dfevents=True))).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'peakposition' in data
    assert 'events' in data
    assert isinstance(data.events[0], pd.DataFrame)

    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               PeakSelectorTask(), DataFrameTask(merge=True)).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'cycle' not in data.index.names
    assert 'peakposition' in data

    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               PeakSelectorTask(),
               DataFrameTask(merge=True, measures=dict(events=True))).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'cycle' in data.index.names
    assert 'peakposition' in data
def test_control():
    "tests task controller"
    peaks = [1., 5., 10., 20.]
    pair = create((EventSimulatorTask(peaks=peaks,
                                      brownian=.01,
                                      stretch=None,
                                      bias=None,
                                      rates=None,
                                      baselineargs=None,
                                      nbeads=2,
                                      ncycles=20), PeakSelectorTask()))
    beads = tuple(tuple(i) for i in pair.run())[0]
    assert tuple(i[0] for i in beads) == (0, 1)

    vals = tuple(beads[0][1])
    assert_allclose([i for i, _ in vals], [0.] + peaks, atol=.01, rtol=1e-2)
    for peak, evts in vals:
        assert evts.dtype == 'O'
        assert all(i.dtype == EVENTS_DTYPE for i in evts)
        tmp = [i[0]['data'].min() for i in evts]
        assert_allclose(tmp, (peak, ) * 20, atol=0.1)
        tmp = [i[0]['data'].max() for i in evts]
        assert_allclose(tmp, (peak, ) * 20, atol=0.1)

    # test that things don't crash
    pair = create(utfilepath('big_selected'), EventDetectionTask(),
                  PeakSelectorTask())
    beads = tuple(next(pair.run())[0])
Example #3
0
def test_toref_frompeaks():
    "tests reference comparison"
    pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask())
    pks  = {i: tuple(j) for i, j in next(iter(pair.run()))}
    res  = HistogramFit().frompeaks(next(iter(pks.values())))
    ret  = HistogramFit().optimize(res, HistogramData(res.histogram,
                                                      res.minvalue+.01,
                                                      res.binwidth/1.01))

    assert_allclose(ret[1:], [1.01, .01], rtol = 5e-4, atol = 5e-4)

    pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask())
    pks  = {i: tuple(j) for i, j in next(iter(pair.run()))}
    res  = ReferencePeaksFit().frompeaks(next(iter(pks.values())))
    ret  = ReferencePeaksFit().optimize(res, res/1.01+0.1)
    assert_allclose(ret[1:], [1.01, .1], rtol = 5e-4, atol = 1e-2)
def test_rescaling():
    "test rescaling"
    for cls in (SingleStrandTask, BaselinePeakTask):
        task = cls()
        obj = task.rescale(5.)
        assert obj is not task
        for i, j in task.__dict__.items():
            if i in ("delta", "maxdisttozero"):
                assert abs(j * 5 - obj.__dict__[i]) < 1e-5
            else:
                assert j == obj.__dict__[i]

    task = PeakSelectorTask()
    obj = task.rescale(5.)
    assert obj is not task
    assert obj == task
def test_gels():
    "test min bias alignment of peaks"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(),
           PeakSelectorTask(peakalign=None), GELSPeakAlignmentTask())
    peaks = next(create(*lst).run())
    _ = peaks[0]  # test everything runs

    cycles = np.array([(1. if i > 5 else 0., 0.) for i in range(10)],
                      dtype=GELSPeakAlignmentTask.DTYPE)
    stats = np.array([np.roll(cycles, i) for i in range(4)],
                     dtype=GELSPeakAlignmentTask.DTYPE)
    for i in range(4):
        stats[i, :]['mean'][:] += i * 10

    truth = np.arange(10, dtype='f4') * .1
    truth -= np.median(truth)
    for i in range(10):
        stats[:, i]['mean'][:] -= truth[i]
    found = lst[-1](stats)

    truth = np.array([
        -0.47142908, -0.37142903, -0.27142864, 0., 0., 0., 0.12857169,
        0.22857153, 0.32857174, 0.4285718
    ],
                     dtype='f4')
    assert_allclose(found, truth)
def test_minbiasalignment():
    "test min bias alignment of peaks"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(),
           PeakSelectorTask(peakalign=None), MinBiasPeakAlignmentTask())
    peaks = next(create(*lst).run())
    _ = peaks[0]  # test everything runs

    cycles = np.array([(1. if i > 5 else 0., 0.) for i in range(10)],
                      dtype=MinBiasPeakAlignmentTask.DTYPE)
    stats = np.array([np.roll(cycles, i) for i in range(4)],
                     dtype=MinBiasPeakAlignmentTask.DTYPE)
    for i in range(4):
        stats[i, :]['mean'][:] += i * 10

    truth = np.arange(10, dtype='f4') * .1
    truth -= np.median(truth)
    for i in range(10):
        stats[:, i]['mean'][:] -= truth[i]
    found = lst[-1](stats)
    truth = np.array([
        -0.44999883, -0.34998798, -0.24997711, 0., 0., 0., 0., 0.24997902,
        0.34999132, 0.45000142
    ],
                     dtype='f4')
    assert_allclose(found, truth)
def test_precision():
    "tests that peaks can be found with a given precision"
    sim = dict(durations=[15, 30, 15, 60, 60, 200, 15, 100],
               drift=None,
               baseline=None,
               framerate=1.,
               poisson=dict(rates=[.05, .05, .1, .1, .2, .2],
                            sizes=[20, 10, 20, 10, 20, 10],
                            peaks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
                            store=['sizes']),
               seed=0,
               nbeads=2,
               ncycles=100)

    pair = create(TrackSimulatorTask(**sim),
                  EventDetectionTask(filter=NonLinearFilter()),
                  PeakSelectorTask(), PeakProbabilityTask())
    tmp = next(pair.run())
    sim = tmp.track.simulator[0]['sizes']
    vals = tuple(tmp[0])

    peaks = np.array([i for i, _ in vals])
    assert_allclose(peaks, [0., .1, .2, .3, .4, .5, .6], rtol=1e-3, atol=1e-3)

    truth = np.sum(sim >= 5, 0) / 100.  # type: ignore
    exp = np.array([i.hybridisationrate for _, i in vals[1:]])
    assert_allclose(exp, truth, rtol=1e-3, atol=1e-3)

    truth = [np.mean(i[i >= 5]) for i in cast(np.ndarray, sim).T]
    exp = np.array([i.averageduration for _, i in vals[1:]])
    assert np.all(np.abs(exp - truth) < 2)
def test_baselinepeak():
    "test single strand peak"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(),
           BaselinePeakTask())
    out1 = [i for i, _ in next(create(*lst).run())[0]]
    out2 = [i for i, _ in next(create(*lst[:-1]).run())[0]]
    assert out1 == out2[1:]
Example #9
0
def test_hp_dataframe(record):
    "test fit to hp dataframe"
    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True, measures = dict(
            peaks = dict(missing = True, peakhfsigma = True)
        )),
    ).run()))
    assert pair.shape == (102, 29)
    assert 'falseneg' in pair.peaks[1].status.unique()
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert isinstance(pair.peaks.values[0], pd.DataFrame)
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
    assert record["withmissing"].approx(
        pair.drop(columns = ['peaks', 'cost', 'modification']).iloc[:5],
        atol = 5e-4
    )
    assert record["peakswithmissing"].approx(
        pair.peaks[1].iloc[:5], atol = 5e-4
    )

    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True),
    ).run()))
    assert pair.shape == (102, 26)
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
def test_singlestrandpeak():
    "test single strand peak"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(),
           SingleStrandTask())
    peaks = next(create(*lst[:-1]).run())
    proc = SingleStrandProcessor()
    ncl = proc.nonclosingramps(peaks, 0)
    truth = np.where(data['truth'][0].strandclosing.duration >=
                     track.phase.duration(..., range(5)))[0]
    assert set(ncl) == set(truth)

    out1 = [i for i, _ in next(create(*lst).run())[0]]
    out2 = [i for i, _ in next(create(*lst[:-1]).run())[0]]
    assert out1 == out2[:-1]
    def newtasks(mdl, beads=None, withhp=False):
        "add a list of tasks to the model"
        lst = [
            TrackReaderTask(path=utpath("big_legacy")),
            DataCleaningTask(),
            ClippingTask(),
            ExtremumAlignmentTask(),
            EventDetectionTask(),
            PeakSelectorTask()
        ]
        if beads:
            lst.insert(1, DataSelectionTask(selected=list(beads)))
        if withhp:
            lst.append(
                FitToHairpinTask(sequences=utpath("hairpins.fasta"),
                                 oligos="kmer"))

        mdl.tasks.tasks.tasks.add(create(lst))
Example #12
0
class PeakFindingBatchTemplate(BatchTemplate):
    "Template of tasks to run"
    alignment: Optional[ExtremumAlignmentTask] = None
    drift = [DriftTask(onbeads=True)]
    detection: Optional[EventDetectionTask] = EventDetectionTask()
    peaks: Optional[PeakSelectorTask] = PeakSelectorTask()

    @initdefaults(frozenset(locals()))
    def __init__(self, **kwa):
        super().__init__(**kwa)

    def __iter__(self) -> Iterator[Task]:
        if self.alignment:
            yield self.alignment
        yield from self.drift
        for i in (self.detection, self.peaks):
            if i is None:
                return
            yield i