def test_dataframe():
    "tests dataframe production"
    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               PeakSelectorTask(),
               DataFrameTask(merge=True, measures=dict(dfevents=True))).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'peakposition' in data
    assert 'events' in data
    assert isinstance(data.events[0], pd.DataFrame)

    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               PeakSelectorTask(), DataFrameTask(merge=True)).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'cycle' not in data.index.names
    assert 'peakposition' in data

    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               PeakSelectorTask(),
               DataFrameTask(merge=True, measures=dict(events=True))).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'cycle' in data.index.names
    assert 'peakposition' in data
Esempio n. 2
0
def test_toref_frompeaks():
    "tests reference comparison"
    pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask())
    pks  = {i: tuple(j) for i, j in next(iter(pair.run()))}
    res  = HistogramFit().frompeaks(next(iter(pks.values())))
    ret  = HistogramFit().optimize(res, HistogramData(res.histogram,
                                                      res.minvalue+.01,
                                                      res.binwidth/1.01))

    assert_allclose(ret[1:], [1.01, .01], rtol = 5e-4, atol = 5e-4)

    pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask())
    pks  = {i: tuple(j) for i, j in next(iter(pair.run()))}
    res  = ReferencePeaksFit().frompeaks(next(iter(pks.values())))
    ret  = ReferencePeaksFit().optimize(res, res/1.01+0.1)
    assert_allclose(ret[1:], [1.01, .1], rtol = 5e-4, atol = 1e-2)
def test_gels():
    "test min bias alignment of peaks"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(),
           PeakSelectorTask(peakalign=None), GELSPeakAlignmentTask())
    peaks = next(create(*lst).run())
    _ = peaks[0]  # test everything runs

    cycles = np.array([(1. if i > 5 else 0., 0.) for i in range(10)],
                      dtype=GELSPeakAlignmentTask.DTYPE)
    stats = np.array([np.roll(cycles, i) for i in range(4)],
                     dtype=GELSPeakAlignmentTask.DTYPE)
    for i in range(4):
        stats[i, :]['mean'][:] += i * 10

    truth = np.arange(10, dtype='f4') * .1
    truth -= np.median(truth)
    for i in range(10):
        stats[:, i]['mean'][:] -= truth[i]
    found = lst[-1](stats)

    truth = np.array([
        -0.47142908, -0.37142903, -0.27142864, 0., 0., 0., 0.12857169,
        0.22857153, 0.32857174, 0.4285718
    ],
                     dtype='f4')
    assert_allclose(found, truth)
def test_minbiasalignment():
    "test min bias alignment of peaks"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(),
           PeakSelectorTask(peakalign=None), MinBiasPeakAlignmentTask())
    peaks = next(create(*lst).run())
    _ = peaks[0]  # test everything runs

    cycles = np.array([(1. if i > 5 else 0., 0.) for i in range(10)],
                      dtype=MinBiasPeakAlignmentTask.DTYPE)
    stats = np.array([np.roll(cycles, i) for i in range(4)],
                     dtype=MinBiasPeakAlignmentTask.DTYPE)
    for i in range(4):
        stats[i, :]['mean'][:] += i * 10

    truth = np.arange(10, dtype='f4') * .1
    truth -= np.median(truth)
    for i in range(10):
        stats[:, i]['mean'][:] -= truth[i]
    found = lst[-1](stats)
    truth = np.array([
        -0.44999883, -0.34998798, -0.24997711, 0., 0., 0., 0., 0.24997902,
        0.34999132, 0.45000142
    ],
                     dtype='f4')
    assert_allclose(found, truth)
def test_precision():
    "tests that peaks can be found with a given precision"
    sim = dict(durations=[15, 30, 15, 60, 60, 200, 15, 100],
               drift=None,
               baseline=None,
               framerate=1.,
               poisson=dict(rates=[.05, .05, .1, .1, .2, .2],
                            sizes=[20, 10, 20, 10, 20, 10],
                            peaks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
                            store=['sizes']),
               seed=0,
               nbeads=2,
               ncycles=100)

    pair = create(TrackSimulatorTask(**sim),
                  EventDetectionTask(filter=NonLinearFilter()),
                  PeakSelectorTask(), PeakProbabilityTask())
    tmp = next(pair.run())
    sim = tmp.track.simulator[0]['sizes']
    vals = tuple(tmp[0])

    peaks = np.array([i for i, _ in vals])
    assert_allclose(peaks, [0., .1, .2, .3, .4, .5, .6], rtol=1e-3, atol=1e-3)

    truth = np.sum(sim >= 5, 0) / 100.  # type: ignore
    exp = np.array([i.hybridisationrate for _, i in vals[1:]])
    assert_allclose(exp, truth, rtol=1e-3, atol=1e-3)

    truth = [np.mean(i[i >= 5]) for i in cast(np.ndarray, sim).T]
    exp = np.array([i.averageduration for _, i in vals[1:]])
    assert np.all(np.abs(exp - truth) < 2)
def test_control():
    "tests task controller"
    peaks = [1., 5., 10., 20.]
    pair = create((EventSimulatorTask(peaks=peaks,
                                      brownian=.01,
                                      stretch=None,
                                      bias=None,
                                      rates=None,
                                      baselineargs=None,
                                      nbeads=2,
                                      ncycles=20), PeakSelectorTask()))
    beads = tuple(tuple(i) for i in pair.run())[0]
    assert tuple(i[0] for i in beads) == (0, 1)

    vals = tuple(beads[0][1])
    assert_allclose([i for i, _ in vals], [0.] + peaks, atol=.01, rtol=1e-2)
    for peak, evts in vals:
        assert evts.dtype == 'O'
        assert all(i.dtype == EVENTS_DTYPE for i in evts)
        tmp = [i[0]['data'].min() for i in evts]
        assert_allclose(tmp, (peak, ) * 20, atol=0.1)
        tmp = [i[0]['data'].max() for i in evts]
        assert_allclose(tmp, (peak, ) * 20, atol=0.1)

    # test that things don't crash
    pair = create(utfilepath('big_selected'), EventDetectionTask(),
                  PeakSelectorTask())
    beads = tuple(next(pair.run())[0])
def test_rescale():
    "test rescale"
    task = EventDetectionTask()
    obj = task.rescale(5.)
    assert obj.events.select.__getstate__() == task.events.select.__getstate__(
    )
    dumps = pickle.dumps
    assert dumps(obj.events.split) == dumps(task.events.split)
    assert dumps(obj.events.merge.pop) == dumps(task.events.merge.pop)
    assert dumps(obj.events.merge.range) == dumps(task.events.merge.range)
    assert obj.events.merge.stats.confidence == task.events.merge.stats.confidence
    assert (obj.events.merge.stats.minprecision -
            task.events.merge.stats.minprecision * 5.) < 1e-6

    task = ExtremumAlignmentTask()
    obj = task.rescale(5.)
    for i, j in task.__dict__.items():
        if i in ('delta', 'minrelax', 'pull', 'opening'):
            assert abs(getattr(obj, i) - j * 5) < 1e-6
        else:
            assert getattr(obj, i) == j

    task = BiasRemovalTask()
    obj = task.rescale(5.)
    for i, j in task.__dict__.items():
        if i in ('zerodelta', 'binsize'):
            assert abs(getattr(obj, i) - j * 5) < 1e-6
        else:
            assert getattr(obj, i) == j
def test_baselinepeak():
    "test single strand peak"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(),
           BaselinePeakTask())
    out1 = [i for i, _ in next(create(*lst).run())[0]]
    out2 = [i for i, _ in next(create(*lst[:-1]).run())[0]]
    assert out1 == out2[1:]
Esempio n. 9
0
def test_hp_dataframe(record):
    "test fit to hp dataframe"
    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True, measures = dict(
            peaks = dict(missing = True, peakhfsigma = True)
        )),
    ).run()))
    assert pair.shape == (102, 29)
    assert 'falseneg' in pair.peaks[1].status.unique()
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert isinstance(pair.peaks.values[0], pd.DataFrame)
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
    assert record["withmissing"].approx(
        pair.drop(columns = ['peaks', 'cost', 'modification']).iloc[:5],
        atol = 5e-4
    )
    assert record["peakswithmissing"].approx(
        pair.peaks[1].iloc[:5], atol = 5e-4
    )

    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True),
    ).run()))
    assert pair.shape == (102, 26)
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
def test_dataframe():
    "tests dataframe production"
    data = next(
        create(utfilepath('big_selected'), EventDetectionTask(),
               DataFrameTask(merge=True)).run())
    assert isinstance(data, pd.DataFrame)
    assert 'track' in data.index.names
    assert 'bead' in data.index.names
    assert 'cycle' in data.index.names
    assert 'event' in data.index.names
    assert 'avg' in data
    assert 'modification' in data.columns
    assert hasattr(data, 'tasklist')
def test_singlestrandpeak():
    "test single strand peak"
    data = Experiment(baseline=None, thermaldrift=None).track(seed=1)
    track = Track(**data)
    lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(),
           SingleStrandTask())
    peaks = next(create(*lst[:-1]).run())
    proc = SingleStrandProcessor()
    ncl = proc.nonclosingramps(peaks, 0)
    truth = np.where(data['truth'][0].strandclosing.duration >=
                     track.phase.duration(..., range(5)))[0]
    assert set(ncl) == set(truth)

    out1 = [i for i, _ in next(create(*lst).run())[0]]
    out2 = [i for i, _ in next(create(*lst[:-1]).run())[0]]
    assert out1 == out2[:-1]
    def newtasks(mdl, beads=None, withhp=False):
        "add a list of tasks to the model"
        lst = [
            TrackReaderTask(path=utpath("big_legacy")),
            DataCleaningTask(),
            ClippingTask(),
            ExtremumAlignmentTask(),
            EventDetectionTask(),
            PeakSelectorTask()
        ]
        if beads:
            lst.insert(1, DataSelectionTask(selected=list(beads)))
        if withhp:
            lst.append(
                FitToHairpinTask(sequences=utpath("hairpins.fasta"),
                                 oligos="kmer"))

        mdl.tasks.tasks.tasks.add(create(lst))
Esempio n. 13
0
class PeakFindingBatchTemplate(BatchTemplate):
    "Template of tasks to run"
    alignment: Optional[ExtremumAlignmentTask] = None
    drift = [DriftTask(onbeads=True)]
    detection: Optional[EventDetectionTask] = EventDetectionTask()
    peaks: Optional[PeakSelectorTask] = PeakSelectorTask()

    @initdefaults(frozenset(locals()))
    def __init__(self, **kwa):
        super().__init__(**kwa)

    def __iter__(self) -> Iterator[Task]:
        if self.alignment:
            yield self.alignment
        yield from self.drift
        for i in (self.detection, self.peaks):
            if i is None:
                return
            yield i