예제 #1
0
def test_rampdatacleaning():
    "test RampDataCleaningTask in scripting context"
    filepath = "Ramp-fov5-PBS_AfterDNApol1.pk"
    beadid = 0  # the only bead in the pk-file

    proc_pass = create(
        TrackReaderTask(path=utpath(filepath)),
        RampDataCleaningTask(minpopulation=20,
                             extentoutlierspercentile=90,
                             minextentoutliers=90))
    proc_fail = create(
        TrackReaderTask(path=utpath(filepath)),
        RampDataCleaningTask(minpopulation=100,
                             extentoutlierspercentile=95,
                             minextentoutliers=90,
                             maxhfsigma=.0002))

    data_pass = next(iter(proc_pass.run()))

    num_datapoints = data_pass[beadid].shape[0]
    num_valid_point = np.sum(np.isfinite(data_pass[beadid]))

    assert num_valid_point / data_pass.nframes == pytest.approx(
        10 / 22, rel=1e-4)  # 10/22 cyc pass

    data_fail = next(iter(proc_fail.run()))
    with pytest.raises(
            DataCleaningException,
            match=re.escape('22 cycles: σ[HF] > 0.0002\n' +
                            '12 cycles: Δz-outlier < 90% of Δz-consensus')):
        data_fail[beadid]
예제 #2
0
def test_message_creation():
    "test message creation"
    proc = create(TrackReaderTask(path=utpath("big_legacy")),
                  DataCleaningTask())
    data = next(iter(proc.run()))
    with pytest.raises(DataCleaningException):
        data[5]
예제 #3
0
def test_cleaning_dataframe():
    "test cleanin creation"
    proc = create(TrackReaderTask(path=utpath("big_legacy")),
                  DataCleaningTask(), DataFrameTask(merge=True))
    data = next(iter(proc.run()))
    assert list(data.reset_index(0).loc[5].bad.unique()) == [True]
    assert list(data.reset_index(0).loc[0].bad.unique()) == [False]
    assert 'modification' in data.columns
    assert hasattr(data, 'tasklist')
def test_adapt_procs_ref():
    "test processors adaptor remove ref"

    procs = [ProcessorController() for i in range(3)]
    procs[0].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)
    procs[0].add(_Ref(), _DummyProc)
    procs[1].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)
    procs[2].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)

    mdl = TasksModel()
    mdl.config.defaulttaskindex = lambda *_: appendtask
    for i in (mdl.config.sdi, mdl.config.picotwist):
        i['fittoreference'] = _Ref()
        i['fittohairpin']   = _Hairpin()
        i['dataframe']      = DataFrameTask()

    for i in (_Ref, _Hairpin, DataFrameTask):
        mdl.state.processors[i] = _DummyProc
    mdl.state.processors[TrackReaderTask] = TrackReaderProcessor

    for i in procs:
        mdl.tasks.add(i)
    mdl.dataframes.peaks.measures = {'events': True}

    lst = mdl.processors
    assert len(lst) == len(procs)
    for i in procs:
        assert len(lst[i.model[0]].model) == 3
        assert i.model is not lst[i.model[0]].model
        assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask'
        assert lst[i.model[0]].model[-1].measures == {'events': True}

    mdl.state.reference = procs[1].model[0]

    lst = mdl.processors
    assert len(lst) == len(procs)
    for i in procs:
        assert len(lst[i.model[0]].model) == 3 + (i.model[0] is not mdl.state.reference)
        assert i.model is not lst[i.model[0]].model
        assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask'
        assert lst[i.model[0]].model[-1].measures == {'events': True}
        if i.model[0] is not mdl.state.reference:
            assert lst[i.model[0]].model[2].__class__.__name__ == '_Ref'
            assert lst[i.model[0]].model[2].defaultdata is lst[mdl.state.reference].data
예제 #5
0
def test_hp_dataframe(record):
    "test fit to hp dataframe"
    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True, measures = dict(
            peaks = dict(missing = True, peakhfsigma = True)
        )),
    ).run()))
    assert pair.shape == (102, 29)
    assert 'falseneg' in pair.peaks[1].status.unique()
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert isinstance(pair.peaks.values[0], pd.DataFrame)
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
    assert record["withmissing"].approx(
        pair.drop(columns = ['peaks', 'cost', 'modification']).iloc[:5],
        atol = 5e-4
    )
    assert record["peakswithmissing"].approx(
        pair.peaks[1].iloc[:5], atol = 5e-4
    )

    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True),
    ).run()))
    assert pair.shape == (102, 26)
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
예제 #6
0
def test_fixedbeadsorting():
    "test fixed bead detection"
    import cleaning.beadsubtraction as B
    B.BeadSubtractionTask = BeadSubtractionTask
    beads = next(
        iter(create(TrackReaderTask(path=utpath("fixedbeads.pk"))).run()))
    lst = FixedBeadDetection()(beads)
    assert len(lst) == 1
    assert lst[0][-1] == 4
    frames = FixedBeadDetection().dataframe(beads)
    assert frames.shape == (4, 16)
    assert set(frames[frames.good].bead.values) == {4}
예제 #7
0
def test_cycletable(bead: int, taskcount: int):
    "test rescaling"
    from eventdetection.processor import ExtremumAlignmentTask
    from cleaning.view._plot import GuiDataCleaningProcessor
    tasks: Tuple[Task, ...] = (
        TrackReaderTask(path=utpath("big_legacy")),
        DataCleaningTask(),
    )
    if taskcount in (1, 2):
        tasks += (ExtremumAlignmentTask(), )
    if taskcount in (-2, 2):
        tasks += (ClippingTask(), )

    find = lambda tpe: next(
        (i for i in tasks if isinstance(i, tpe)), None)  # noqa

    ctrl = create(tasks)
    _ = next(iter(ctrl.run()))[0]  # get the track
    track = ctrl.data.getcache(tasks[0])()

    GuiDataCleaningProcessor.computeall(bead,
                                        ctrl,
                                        cleaning=find(DataCleaningTask),
                                        alignment=find(ExtremumAlignmentTask),
                                        clipping=find(ClippingTask))

    info = {
        i.name: i.values
        for i in ctrl.data.getcache(DataCleaningTask)()[bead][0]
    }
    applied_rules = set(info.keys())
    assert applied_rules == {
        'population', 'hfsigma', 'extent', 'pingpong', 'saturation',
        'discarded', 'alignment', 'clipping'
    }
    assert 'phasejump' not in applied_rules  # 'phasejump'-rule must *only* act on SDI-experiments

    assert all(len(i) == track.ncycles for i in info.values())

    if find(ExtremumAlignmentTask):
        nans = np.isnan(info['alignment'])
        assert nans.sum() == {0: 0, 11: 0, 15: 23}[bead]
        assert_equal(info['discarded'][nans], np.ones(nans.sum(), dtype='f4'))
    else:
        assert_equal(info['alignment'], np.zeros(track.ncycles, dtype='f4'))

    if find(ClippingTask):
        assert ((info['discarded'][np.isfinite(info['clipping'])] + 1e-5) <
                info['clipping'][np.isfinite(info['clipping'])]).sum() == 0
    else:
        assert_equal(info['clipping'], np.zeros(track.ncycles, dtype='f4'))
def test_adapt_procs_fittohp():
    "test processors adaptor remove ref"

    procs = [ProcessorController() for i in range(2)]
    procs[0].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)
    procs[0].add(_Ref(), _DummyProc)
    procs[0].add(_Hairpin(resolve = True), _DummyProc)
    procs[1].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)

    mdl = TasksModel()
    mdl.config.defaulttaskindex = lambda _1, tsk, *_2: (
        1 if isinstance(tsk, FixedBeadDetectionTask) else appendtask
    )
    mdl.state.sequences = 'a'
    for i in (mdl.config.sdi, mdl.config.picotwist):
        i['fittoreference'] = _Ref()
        i['fittohairpin']   = _Hairpin(resolve = False)
        i['dataframe']      = DataFrameTask()

    for i in (_Ref, _Hairpin, DataFrameTask):
        mdl.state.processors[i] = _DummyProc
    mdl.state.processors[TrackReaderTask] = TrackReaderProcessor

    for i in procs:
        mdl.tasks.add(i)
    mdl.dataframes.fits.measures = {'peaks': {'all': True, 'events': True}}

    lst = mdl.processors
    assert len(lst) == 1
    for i in procs[:1]:
        assert len(lst[i.model[0]].model) == 4
        assert i.model is not lst[i.model[0]].model
        assert lst[i.model[0]].model[2].__class__.__name__ == '_Hairpin'
        assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask'
        assert lst[i.model[0]].model[-1].measures == {'peaks': {'all': True, 'events': True}}
        assert lst[i.model[0]].model[2].__class__.__name__ == '_Hairpin'
예제 #9
0
    def newtasks(mdl, beads=None, withhp=False):
        "add a list of tasks to the model"
        lst = [
            TrackReaderTask(path=utpath("big_legacy")),
            DataCleaningTask(),
            ClippingTask(),
            ExtremumAlignmentTask(),
            EventDetectionTask(),
            PeakSelectorTask()
        ]
        if beads:
            lst.insert(1, DataSelectionTask(selected=list(beads)))
        if withhp:
            lst.append(
                FitToHairpinTask(sequences=utpath("hairpins.fasta"),
                                 oligos="kmer"))

        mdl.tasks.tasks.tasks.add(create(lst))
예제 #10
0
def test_diskcache_clear(tmp_path):
    reader = TrackReaderTask(path=utpath("big_legacy"))
    tasks = [
        create(reader, UndersamplingTask(), DataFrameTask()),
        create(reader, DataFrameTask())
    ]
    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': i})

    cnf = DiskCacheConfig(path=str(tmp_path / "cache"))
    assert not Path(cnf.path).exists()

    cnf.insert(tasks, 10001)
    assert Path(cnf.path).exists()

    cnf.clear(complete=True)
    with DiskCache(cnf.path) as cache:
        assert sum(1 for _ in cache.iterkeys()) == 1
        assert cache.get(VERSION_KEY) == VERSION

    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': -1})
    cnf.insert(tasks, 10001)
    assert Path(cnf.path).exists()

    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': -2})
    cnf.update(tasks, 10001)
    assert tasks[0].data.getcache(DataFrameTask)()['index'] == -1
    assert tasks[1].data.getcache(DataFrameTask)()['index'] == -1

    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': -2})
    cnf.clear(processors=tasks)
    assert Path(cnf.path).exists()
    cnf.update(tasks, 10001)
    assert tasks[0].data.getcache(DataFrameTask)()['index'] == -2
    assert tasks[1].data.getcache(DataFrameTask)()['index'] == -2
예제 #11
0
def test_diskcache_insert(tmp_path):
    reader = TrackReaderTask(path=utpath("big_legacy"))
    tasks = [
        create(reader, UndersamplingTask(), DataFrameTask()),
        create(reader, DataFrameTask())
    ]
    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': i})

    cnf = DiskCacheConfig(path=str(tmp_path / "cache"))
    cnf.clear()
    cnf.insert(tasks, 10001)
    assert cnf.get(tasks[0], 10001)['index'] == 0
    assert cnf.get(tasks[1], 10001)['index'] == 1

    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': i * 2 + 1})

    cnf.insert(tasks, 10002)
    assert cnf.get(tasks[0], 10002)['index'] == 1
    assert cnf.get(tasks[1], 10002)['index'] == 3

    tasks = [
        create(reader, UndersamplingTask(), DataFrameTask()),
        create(reader, DataFrameTask())
    ]
    for i, procs in enumerate(tasks):
        procs.data.setcache(DataFrameTask, {'index': -1})

    cnf.update(tasks, 10001)
    assert tasks[0].data.getcache(DataFrameTask)()['index'] == -1
    assert tasks[1].data.getcache(DataFrameTask)()['index'] == -1

    cnf.update(tasks, 10002)
    assert tasks[0].data.getcache(DataFrameTask)()['index'] == 1
    assert tasks[1].data.getcache(DataFrameTask)()['index'] == 3