Ejemplo n.º 1
0
def test_hp_task_creation():
    "test fit to hp dataframe"
    task = FitToHairpinTask(
        sequence = utpath("hairpins.fasta"),
        oligos   = "ctgc",
        fit      = ChiSquareFit(),
    )
    assert isinstance(task.fit.pop(None), ChiSquareFit)
    assert set(task.fit) == {'015', *(f"GF{i}" for i in range(1, 5))}
    assert all(isinstance(i, ChiSquareFit) for i in task.fit.values())
    assert_equal(task.fit['GF4'].peaks, [153, 205, 407, 496, 715, 845])
    assert all(isinstance(i, ChiSquareFit) for i in task.fit.values())

    task = FitToHairpinTask(
        sequence = utpath("hairpins.fasta"),
        oligos   = "ctgc",
        fit      = PeakGridFit(),
    )
    assert isinstance(task.fit.pop(None), PeakGridFit)
    assert set(task.fit) == {'015', *(f"GF{i}" for i in range(1, 5))}
    assert all(isinstance(i, PeakGridFit) for i in task.fit.values())

    for i in PeakGridFit(), ChiSquareFit():
        task = FitToHairpinTask(
            sequence = "doesnotexist.fasta",
            oligos   = "ctgc",
            fit      = i
        )
        assert isinstance(task.fit.pop(None), type(i))
        assert not task.fit
Ejemplo n.º 2
0
def test_rampdatacleaning():
    "test RampDataCleaningTask in scripting context"
    filepath = "Ramp-fov5-PBS_AfterDNApol1.pk"
    beadid = 0  # the only bead in the pk-file

    proc_pass = create(
        TrackReaderTask(path=utpath(filepath)),
        RampDataCleaningTask(minpopulation=20,
                             extentoutlierspercentile=90,
                             minextentoutliers=90))
    proc_fail = create(
        TrackReaderTask(path=utpath(filepath)),
        RampDataCleaningTask(minpopulation=100,
                             extentoutlierspercentile=95,
                             minextentoutliers=90,
                             maxhfsigma=.0002))

    data_pass = next(iter(proc_pass.run()))

    num_datapoints = data_pass[beadid].shape[0]
    num_valid_point = np.sum(np.isfinite(data_pass[beadid]))

    assert num_valid_point / data_pass.nframes == pytest.approx(
        10 / 22, rel=1e-4)  # 10/22 cyc pass

    data_fail = next(iter(proc_fail.run()))
    with pytest.raises(
            DataCleaningException,
            match=re.escape('22 cycles: σ[HF] > 0.0002\n' +
                            '12 cycles: Δz-outlier < 90% of Δz-consensus')):
        data_fail[beadid]
Ejemplo n.º 3
0
def test_taskdialog_fit_view(pkviewserver):
    "test the view"
    server = pkviewserver()[0]
    server.ctrl.theme.model("peakcalling.view.stats").linear = False
    server.addhp(sequences=utpath("hp6.fasta"), oligos=["aacc"], rendered=True)
    assert set(server.task(FitToHairpinTask).sequences) == {
        'full', 'oligo', 'target'
    }
    assert server.task(FitToHairpinTask).oligos == ['aacc']
    assert set(
        server.task(FitToHairpinTask).fit) == {'full', 'oligo', 'target'}

    modal = server.selenium.modal("//span[@class='icon-dpx-cog']", True)
    with modal:
        modal.tab("Hairpins")
        modal[f"//input[@name='items[0].fit.task.sequences']"] = str(
            utpath("hairpins.fasta"))
        modal.tab("Oligos")
        modal[f"//input[@name='items[0].fit.task.oligos']"] = "kmer"
    server.wait()
    assert set(server.task(FitToHairpinTask).sequences) == {
        '015', *(f'GF{i}' for i in range(1, 5))
    }
    assert server.task(FitToHairpinTask).oligos == ['ctgt']
    assert set(server.task(FitToHairpinTask).fit) == {
        '015', *(f'GF{i}' for i in range(1, 5))
    }

    with modal:
        assert (modal[f"//input[@name='items[0].fit.task.sequences']"].
                get_attribute('value') == str(utpath("hairpins.fasta")))
        assert (modal[f"//input[@name='items[0].fit.task.oligos']"].
                get_attribute('value') == "ctgt")
Ejemplo n.º 4
0
def test_subtraction_dataframe():
    "test cleanin creation"
    proc = create(TrackReaderTask(path=utpath("fixedbeads.pk")),
                  DataFrameTask(merge=True, measures={'status': True}))
    data = next(iter(proc.run()))
    assert list(data[data.fixed].reset_index().bead) == [4]
    assert 'status' in data.columns
    assert isinstance(data.status.values[0], pd.DataFrame)
    assert data[data.fixed].reset_index().status[0].shape[0] == 0
    assert 'modification' in data.columns
    assert hasattr(data, 'tasklist')

    proc = create(TrackReaderTask(path=utpath("fixedbeads.pk")),
                  DataCleaningTask(),
                  DataFrameTask(merge=True, measures={'status': True}))
    data = next(iter(proc.run()))
    assert list(data[data.fixed].reset_index().bead) == [4]
    assert 'status' in data.columns
    assert isinstance(data.status.values[0], pd.DataFrame)
    assert data[data.fixed].reset_index().status[0].shape[0] != 0

    proc = create(TrackReaderTask(path=utpath("fixedbeads.pk")),
                  DataFrameTask(merge=True, measures={'fixed': True}))
    data = next(iter(proc.run()))
    assert list(data[data.fixed].reset_index().bead) == [4]
    assert 'status' not in data.columns

    proc = create(TrackReaderTask(path=utpath("fixedbeads.pk")),
                  DataCleaningTask(),
                  DataFrameTask(merge=True, measures={'fixed': True}))
    data = next(iter(proc.run()))
    assert list(data[data.fixed].reset_index().bead) == [4]
    assert 'status' not in data.columns
Ejemplo n.º 5
0
def test_loadgrdir():
    paths = utpath("big_legacy"), utpath("big_grlegacy")
    for time in range(2):
        if time == 1:
            paths = (
                paths[:1]  # type: ignore
                + tuple(
                    str(i) for i in Path(cast(str, paths[1])).iterdir()
                    if i.suffix == '.gr'))
            paths = paths[5:] + paths[:5]  # type:ignore
        track = data.Track(path=paths)
        keys = {
            0, 10, 12, 13, 14, 16, 17, 18, 1, 21, 22, 23, 24, 25, 26, 27, 28,
            29, 2, 34, 35, 37, 3, 4, 6, 7
        }
        assert set(track.beads.keys()) == keys

        keys = {
            17, 23, 41, 14, 31, 45, 18, 37, 44, 7, 32, 6, 48, 22, 24, 47, 28,
            19, 30, 25, 43, 42, 8, 26, 16, 12, 9, 33, 35, 27, 3, 10, 21, 15,
            34, 29, 13, 5, 4, 20, 46, 11
        }
        keys = {i - 3 for i in keys}
        good = {
            i[1]
            for i, j in track.cycles[28, ...] if not np.all(np.isnan(j))
        }
        assert good == keys
        assert len(good) < track.ncycles
        assert all(
            np.isfinite(j).sum() == 0
            for _, j in track.cycles.withphases(0)[28, ...])
def test_lru():
    tasks = [TrackReaderTask(utpath("big_legacy")), _Ref()]
    lru = _RootCache(2)
    info = {}
    assert info is lru.setdefault(tasks, info)
    assert info is lru.setdefault(tasks, None)
    assert info is lru[tasks]
    assert tasks in lru

    lru[tasks] = cache = {}
    assert tasks in lru
    assert info is not lru[tasks]
    assert cache is lru[tasks]

    tasks = [TrackReaderTask(utpath("big_legacy")), _Ref()]
    assert cache is lru.setdefault(tasks, None)

    tasks2 = [TrackReaderTask(utpath("big_legacy"))]
    assert tasks2 not in lru
    info2  = lru.setdefault(tasks2, None)
    assert info2 is lru.setdefault(tasks2, None)

    tasks3 = [TrackReaderTask(utpath("big_legacy")), _Ref(), _Hairpin()]
    lru.setdefault(tasks3, None)
    assert tasks not in lru
    assert tasks2  in lru

    del lru[tasks2]
    assert tasks2  not in lru
Ejemplo n.º 7
0
def test_cycles_lazy():
    "tests what happens when using lazy mode"
    truth = readtrack(utpath("big_legacy"))[0]
    for _, vals in data.Cycles(
            track=lambda _: data.Track(path=utpath("big_legacy")),
            first=lambda _: 2,
            last=lambda _: 3,
            selected=lambda _: [(0, 1)]):
        assert np.array_equal(vals, truth[1206 - 678:1275 - 678])
Ejemplo n.º 8
0
def test_confusion(scriptingcleaner):
    "test the confusion matrix"
    from scripting             import Track
    from scripting.confusion   import ConfusionMatrix, LNAHairpin
    from tests.testingcore     import path as utpath
    peaks = pickle.load(open(cast(str, utpath("hp6jan2018.peaks")), "rb"))
    peaks = peaks[peaks.track != 'ref']
    cnf   = ConfusionMatrix(oligos  = peaks.track.unique(),
                            seq     = LNAHairpin(path = utpath("hp6.fasta")))
    det   = cnf.detection(peaks)
    conf  = cnf.confusion(det)
    return det, conf
Ejemplo n.º 9
0
def test_allleaves():
    'tests pairing of track files and gr-files in the absence of cgr'
    trkpath = str(Path(cast(str, utpath("big_legacy"))).parent / '*.trk')
    print(trkpath)
    grpath = str(Path(cast(str, utpath("big_grlegacy"))) / '*.gr')
    print(grpath)
    good, _1, _2 = LegacyGRFilesIO.scan(trkpath,
                                        grpath,
                                        cgrdir="",
                                        allleaves=True)
    assert len(good) == 1
    trks = [str(path[0]) for path in good]
    assert utpath("big_legacy") in trks
Ejemplo n.º 10
0
def test_muwells(scriptingcleaner):
    "test µwells"
    from scripting          import Track
    from tests.testingcore  import path as utpath
    trackfile = utpath("muwells/W6N46_HPB20190107_W2_OR134689_cycle_1.9-2.10_TC10m.trk")
    liafile   = utpath("muwells/W6N46_HPB20190107_OR134689_cycle_1.9-2.10_TC10m.txt")
    track     = Track(path= (trackfile, liafile))
    assert set(track.tasks.tasks.keys()) == set()
    other     = track.op.rescaletobead(0)
    assert track is not other
    assert set(other.tasks.tasks.keys()) == {
        'cleaning', 'alignment', 'eventdetection', 'peakselector'
    }
Ejemplo n.º 11
0
def test_rawprecisiontask():
    "test raw precision task"
    for i in ('range', 'normalized'):
        # test that track is opened with the correct raw precision computer
        tsk = RawPrecisionTask(computer=i)
        trk = next(iter(create(utpath("big_all"), tsk).run())).track
        assert trk.rawprecision().keyword() == i

        # test that track is copied whenever the requested raw precision
        # computer is different from the original one
        tsk.computer = 'range' if i == 'normalized' else 'normalized'
        trk2 = next(iter(create(utpath("big_all"), tsk).run())).track
        assert trk.rawprecision().keyword() == i
        assert trk2.rawprecision().keyword() == tsk.computer
Ejemplo n.º 12
0
def test_cycleprocess_emptycycles():
    "tests drift removal on cycles"
    tasks = (utpath("big_all"),
             ExtremumAlignmentTask(phase = 'pull', minpopulation = 0.),
             DriftTask(onbeads = False))
    ret = dict(next(i for i in create(tasks).run()))
    assert ret is not None # check that computations don't crash
Ejemplo n.º 13
0
def test_io_recognition():
    "tests that the right IO class recognizes its paths"
    get = lambda i: str(utpath(i))
    files = dict(pickles=(
        (get("100bp_4mer/ref.pk"), ),
        get("100bp_4mer/ref.pk"),
    ),
                 tracks=(
                     (get("small_legacy"), ),
                     get("small_legacy"),
                 ),
                 grs=((get("big_legacy"), get("CTGT_selection")),
                      (get("big_legacy"),
                       get("CTGT_selection") + "/Z(t)bd0track10.gr")),
                 none=(
                     get("CTGT_selection"),
                     get("CTGT_selection") + "/Z(t)bd0track10.gr",
                     (get("CTGT_selection") + "/Z(t)bd0track10.gr"),
                 ))

    types = dict(pickles=PickleIO, tracks=LegacyTrackIO, grs=LegacyGRFilesIO)
    for tpename, tpe in types.items():
        for fname, paths in files.items():
            for path in cast(Iterable, paths):
                assert (tpe.check(path) is None) is (tpename != fname)
Ejemplo n.º 14
0
def test_beaditerkeys():
    "tests wether keys are well listed"
    track = data.Track(path=utpath("small_legacy"))
    beads = lambda: data.Beads(track=track, data=_MyItem(track.data))
    vals = set(range(92))

    assert len(tuple(beads().keys())) == len(vals)
    assert len(tuple(i for i, _ in beads())) == len(vals)
    assert len(tuple(beads().selecting(all).keys())) == len(vals)
    assert len(tuple(beads().selecting(None).keys())) == len(vals)
    assert len(tuple(beads()[:].keys())) == len(vals)
    assert len(tuple(beads()[:2].keys())) == len({0, 1})
    assert len(tuple(beads()[:2][1:5].keys())) == len({1})  # pylint: disable=unsubscriptable-object

    assert set(beads().keys()) == vals
    assert set(i for i, _ in beads()) == vals
    assert set(beads().selecting(all).keys()) == vals
    assert set(beads().selecting(None).keys()) == vals
    assert set(beads()[:].keys()) == vals
    assert set(beads()[:2].keys()) == {0, 1}
    assert set(beads()[:2][1:5].keys()) == {1}  # pylint: disable=unsubscriptable-object
    assert isinstance(beads()[0], np.ndarray)

    sel = track.beads
    assert tuple(beads().selecting([2, 3, 2]).keys()) == (2, 3, 2)
    assert tuple(i for i, _ in sel.selecting([2, 3, 2])) == (2, 3, 2)
    assert tuple(sel.selecting(2, clear=True).keys()) == (2, )
    assert tuple(beads().selecting(range(50)).discarding(range(
        1, 48)).keys()) == (0, 48, 49)
    assert tuple(beads().selecting(2).selecting([2, 3]).keys()) == (2, 2, 3)
Ejemplo n.º 15
0
def test_toref_frompeaks():
    "tests reference comparison"
    pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask())
    pks  = {i: tuple(j) for i, j in next(iter(pair.run()))}
    res  = HistogramFit().frompeaks(next(iter(pks.values())))
    ret  = HistogramFit().optimize(res, HistogramData(res.histogram,
                                                      res.minvalue+.01,
                                                      res.binwidth/1.01))

    assert_allclose(ret[1:], [1.01, .01], rtol = 5e-4, atol = 5e-4)

    pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask())
    pks  = {i: tuple(j) for i, j in next(iter(pair.run()))}
    res  = ReferencePeaksFit().frompeaks(next(iter(pks.values())))
    ret  = ReferencePeaksFit().optimize(res, res/1.01+0.1)
    assert_allclose(ret[1:], [1.01, .1], rtol = 5e-4, atol = 1e-2)
Ejemplo n.º 16
0
def test_trackconfig(scriptingcleaner):
    "test scripting enhanced track"
    from scripting             import Track, Tasks, localcontext
    from data                  import Cycles
    from eventdetection.data   import Events
    from peakfinding.processor import PeaksDict
    from tests.testingcore     import path as utpath

    track = Track(path = utpath("big_legacy"))
    assert track.path == utpath("big_legacy")

    assert set(track.data.keys()) == set(list(range(0,39)))
    assert isinstance(track.cleancycles,    Cycles)
    assert isinstance(track.measures,       Cycles)
    assert isinstance(track.events,         Events)
    assert isinstance(track.peaks,          PeaksDict)
Ejemplo n.º 17
0
def test_message_creation():
    "test message creation"
    proc = create(TrackReaderTask(path=utpath("big_legacy")),
                  DataCleaningTask())
    data = next(iter(proc.run()))
    with pytest.raises(DataCleaningException):
        data[5]
Ejemplo n.º 18
0
def test_beadsplot_info_simple(diskcaching):
    "test the view"
    # pylint: disable=protected-access
    beads, mdl = _Fig.create()

    # testing for when there is nothing to plot
    for cls in _PeaksPlot, _HairpinPlot:
        assert (
            dict(cls(beads, mdl.tasks.processors)._reset())['x_range']['factors']
            == [('track1', '0')]
        )

    def _change(tpe, **kwa):
        mdl.theme.__dict__.update(**kwa)
        cls = _PeaksPlot if tpe else _HairpinPlot
        return dict(cls(beads, mdl.tasks.processors)._reset())

    mdl.tasks.jobs.launch(list(mdl.tasks.processors.values()))

    cache = _change(True)
    assert cache['x_range']['factors'] == list(zip(
        repeat(''), repeat(''),
        [
            '0', '1', '2', '3', '4', '7', '8', '12', '13', '14', '17', '18', '23',
            '24', '25', '27', '33', '34', '35', '37'
        ]
    ))

    next(iter(mdl.tasks.tasks.tasks.tasks.values())).add(
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
        ),
        mdl.tasks.tasks.state.processors[FitToHairpinTask]
    )
    mdl.tasks.jobs.launch(list(mdl.tasks.processors.values()))

    cache = _change(False)
    assert cache['x_range']['factors'] == [
        ('GF1', '', '14'), ('GF1', '', '33'), ('GF1', '', '1'), ('GF1', '', '7'),
        ('GF1', '', '25'), ('GF1', '', '35'), ('GF1', '', '12'), ('GF3', '', '27'),
        ('GF3', '', '13'), ('GF3', '', '3'), ('GF3', '', '17'), ('GF3', '', '37'),
        ('GF3', '', '23'), ('GF3', '', '18'), ('GF4', '', '34'), ('GF4', '', '0'),
        ('GF4', '', '4'), ('GF4', '', '24'), ('GF2', '', '2')
    ]

    mdl.display.hairpins = {'015', 'GF2', 'GF3', 'GF4'}
    cache = _change(False)
    assert cache['x_range']['factors'] == [
        ('GF1', '', '14'), ('GF1', '', '33'), ('GF1', '', '1'), ('GF1', '', '7'),
        ('GF1', '', '25'), ('GF1', '', '35'), ('GF1', '', '12')
    ]

    mdl.theme.sorting = {}
    cache = _change(False)
    assert cache['x_range']['factors'] == [
        ('GF1', '', '1'),  ('GF1', '', '7'), ('GF1', '', '12'), ('GF1', '', '14'),
        ('GF1', '', '25'), ('GF1', '', '33'), ('GF1', '', '35')
    ]
Ejemplo n.º 19
0
def test_concatenate():
    'test whether two Track stack properly'
    trk1 = Track(path=utpath("small_legacy"))
    trk2 = dropbeads(Track(path=utpath("small_legacy")), 0)
    size1, size2 = [next(iter(x.data.values())).size for x in (trk1, trk2)]
    trk = concatenatetracks(trk1, trk2)

    assert set(trk.data.keys()) == (set(trk1.data.keys())
                                    | set(trk2.data.keys()))
    assert all((trk.secondaries.frames[1:] - trk.secondaries.frames[:-1]) == 1)
    assert all(np.isnan(trk.data[0][-size2:]))
    assert all(~np.isnan(trk.data[0][:size1]))

    assert_allclose(trk.phases[:len(trk1.phases)], trk1.phases)
    assert_allclose(
        trk.phases[len(trk1.phases):], trk2.phases +
        trk1.secondaries.frames[-1] - trk2.secondaries.frames[0] + 1)
Ejemplo n.º 20
0
def test_findgrdir():
    trkpath = str(Path(cast(str, utpath("big_legacy"))).parent / '*.trk')
    paths = trkpath, utpath("big_grlegacy")
    track = data.Track(path=paths)
    keys = {
        0, 10, 12, 13, 14, 16, 17, 18, 1, 21, 22, 23, 24, 25, 26, 27, 28, 29,
        2, 34, 35, 37, 3, 4, 6, 7
    }
    assert set(track.beads.keys()) == keys

    trkpath = str(Path(cast(str, utpath("big_legacy"))).parent / '*.trk')
    paths = trkpath, utpath("big_grlegacy")
    track = data.Track(path=paths)
    keys = {
        0, 10, 12, 13, 14, 16, 17, 18, 1, 21, 22, 23, 24, 25, 26, 27, 28, 29,
        2, 34, 35, 37, 3, 4, 6, 7
    }
    assert set(track.beads.keys()) == keys
Ejemplo n.º 21
0
    def newtasks(mdl, beads=None, withhp=False):
        "add a list of tasks to the model"
        lst = [
            TrackReaderTask(path=utpath("big_legacy")),
            DataCleaningTask(),
            ClippingTask(),
            ExtremumAlignmentTask(),
            EventDetectionTask(),
            PeakSelectorTask()
        ]
        if beads:
            lst.insert(1, DataSelectionTask(selected=list(beads)))
        if withhp:
            lst.append(
                FitToHairpinTask(sequences=utpath("hairpins.fasta"),
                                 oligos="kmer"))

        mdl.tasks.tasks.tasks.add(create(lst))
Ejemplo n.º 22
0
def test_dataframe():
    'test whether two Track stack properly'
    trk = Track(path=utpath("small_legacy"))
    dframe = dataframe(trk)
    for i, j in trk.beads:
        assert_equal(dframe[f'b{i}'], j)
    for i in ("zmag", "phase", "cid"):
        assert_equal(dframe[i], getattr(trk.secondaries, i))
    assert len({"tsample", "tsink", "tservo"} - set(dframe.columns)) == 0
Ejemplo n.º 23
0
def test_cleaning_dataframe():
    "test cleanin creation"
    proc = create(TrackReaderTask(path=utpath("big_legacy")),
                  DataCleaningTask(), DataFrameTask(merge=True))
    data = next(iter(proc.run()))
    assert list(data.reset_index(0).loc[5].bad.unique()) == [True]
    assert list(data.reset_index(0).loc[0].bad.unique()) == [False]
    assert 'modification' in data.columns
    assert hasattr(data, 'tasklist')
def test_adapt_procs_ref():
    "test processors adaptor remove ref"

    procs = [ProcessorController() for i in range(3)]
    procs[0].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)
    procs[0].add(_Ref(), _DummyProc)
    procs[1].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)
    procs[2].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor)

    mdl = TasksModel()
    mdl.config.defaulttaskindex = lambda *_: appendtask
    for i in (mdl.config.sdi, mdl.config.picotwist):
        i['fittoreference'] = _Ref()
        i['fittohairpin']   = _Hairpin()
        i['dataframe']      = DataFrameTask()

    for i in (_Ref, _Hairpin, DataFrameTask):
        mdl.state.processors[i] = _DummyProc
    mdl.state.processors[TrackReaderTask] = TrackReaderProcessor

    for i in procs:
        mdl.tasks.add(i)
    mdl.dataframes.peaks.measures = {'events': True}

    lst = mdl.processors
    assert len(lst) == len(procs)
    for i in procs:
        assert len(lst[i.model[0]].model) == 3
        assert i.model is not lst[i.model[0]].model
        assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask'
        assert lst[i.model[0]].model[-1].measures == {'events': True}

    mdl.state.reference = procs[1].model[0]

    lst = mdl.processors
    assert len(lst) == len(procs)
    for i in procs:
        assert len(lst[i.model[0]].model) == 3 + (i.model[0] is not mdl.state.reference)
        assert i.model is not lst[i.model[0]].model
        assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask'
        assert lst[i.model[0]].model[-1].measures == {'events': True}
        if i.model[0] is not mdl.state.reference:
            assert lst[i.model[0]].model[2].__class__.__name__ == '_Ref'
            assert lst[i.model[0]].model[2].defaultdata is lst[mdl.state.reference].data
Ejemplo n.º 25
0
def test_hp_dataframe(record):
    "test fit to hp dataframe"
    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True, measures = dict(
            peaks = dict(missing = True, peakhfsigma = True)
        )),
    ).run()))
    assert pair.shape == (102, 29)
    assert 'falseneg' in pair.peaks[1].status.unique()
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert isinstance(pair.peaks.values[0], pd.DataFrame)
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
    assert record["withmissing"].approx(
        pair.drop(columns = ['peaks', 'cost', 'modification']).iloc[:5],
        atol = 5e-4
    )
    assert record["peakswithmissing"].approx(
        pair.peaks[1].iloc[:5], atol = 5e-4
    )

    pair = next(iter(create(
        TrackReaderTask(path = utpath("big_legacy")),
        EventDetectionTask(),
        PeakSelectorTask(),
        FitToHairpinTask(
            sequence = utpath("hairpins.fasta"),
            oligos   = "4mer",
            fit      = ChiSquareFit()
        ),
        DataFrameTask(merge = True),
    ).run()))
    assert pair.shape == (102, 26)
    assert pair.index.names == ['hpin', 'track', 'bead']
    assert 'modification' in pair.columns
    assert hasattr(pair, 'tasklist')
Ejemplo n.º 26
0
def test_task_cache():
    "Tests that actions can be cached"

    # pylint: disable=unused-variable, too-many-locals,invalid-name
    # pylint: disable=too-many-statements,missing-docstring,no-self-use
    class TBeads(tasks.Task):
        level = tasks.Level.bead

        def __init__(self):
            super().__init__()
            self.dummy = None

    calls = []

    class TBProc(Processor):
        tasktype = TBeads

        @Processor.cache
        def run(self, _):
            def _outp(_, x):
                calls.append(1)
                return x

            return _outp

    ctrl = TaskController()
    read = tasks.TrackReaderTask(path=utpath("small_legacy"))
    tb = TBeads()
    ctrl.opentrack(read, (read, tb))

    assert ctrl.cache(read, tb)() is None
    ctrl.run(read, tb)
    dt = ctrl.cache(read, tb)()
    assert dt is not None
    assert len(dt) == 0

    tuple(ctrl.run(read, tb))
    assert len(dt) == 1
    assert len(next(iter(dt.values()))) == 0

    tuple(bead for frame in ctrl.run(read, tb) for bead in frame)
    sz = len(calls)
    assert len(next(iter(dt.values()))) == sz

    tuple(ctrl.run(read, tb))
    assert len(calls) == sz

    ctrl.updatetask(read, tb, dummy=1)
    assert ctrl.cache(read, tb)() is None
    v1 = next(iter(next(ctrl.run(read, tb))))[1]
    v2 = next(iter(ctrl.run(read, read)[0]))[1]
    dt = ctrl.cache(read, tb)()
    assert len(dt) == 1
    assert len(next(iter(dt.values()))) == 1
    assert numpy.array_equal(v1, v2)
    assert v1 is not v2
Ejemplo n.º 27
0
def test_tracksdict_ramps_dataframe(scriptingcleaner):
    "test TracksDict.basedataframe"
    from scripting          import TracksDict
    from tests.testingcore  import path as utpath
    tracks = TracksDict(utpath("100bp_4mer")+"/../ramp*.trk")

    dframe = tracks.dataframe(ramps = True)
    assert 'modification' in dframe.columns
    assert hasattr(dframe, 'tasklist')
    assert dframe.tasklist[0][-1].__class__.__name__.startswith("Ramp")
Ejemplo n.º 28
0
def test_tracksdict_hpfit_dataframe(scriptingcleaner):
    "test that we can launch a fit to hp on a tracksdict"
    from scripting          import TracksDict
    from tests.testingcore  import path as utpath
    import pandas as pd
    tracks = TracksDict()
    tracks['xxx'] = utpath("big_legacy")
    frame = tracks.peaks.dataframe(sequence = utpath("hairpins.fasta"), oligos = '4mer')
    assert isinstance(frame, pd.DataFrame)
    assert len(frame.tasklist) == 1
    assert isinstance(frame.tasklist[0], list)
    assert frame.tasklist[0][-2].oligos == "4mer"
    assert list(frame.oligo.unique()) == ["ctgt"]
    assert frame.shape == (80, 27)
    assert frame.index.names == ['hpin', 'track', 'bead']

    assert 'trackcount' in frame.columns
    assert 'modification' in frame.columns
    assert hasattr(frame, 'tasklist')
Ejemplo n.º 29
0
def test_track(scriptingcleaner):
    "test scripting enhanced track"
    from scripting             import Track, Tasks, localcontext, Task
    from data                  import Cycles, Beads
    from eventdetection.data   import Events
    from peakfinding.processor import PeaksDict
    from taskmodel             import InstrumentType
    from tests.testingcore     import path as utpath

    track = Track(path = utpath("big_legacy"))
    assert track.path == utpath("big_legacy")

    assert set(track.data.keys()) == set(list(range(0,39)))
    for i, j in [
            ('cleanbeads',  Beads),
            ('cleancycles', Cycles),
            ('events',      Events),
            ('peaks',       PeaksDict)
    ]:
        itm = getattr(track, i)
        assert isinstance(itm, j)
        assert all(isinstance(k, Task) for k in itm.tasklist)
    assert track.cleaned is False

    assert ([Tasks(i) for i in Tasks.defaulttasklist(None, Tasks.clipping, False)]
            == [Tasks.undersampling, Tasks.cleaning, Tasks.alignment, Tasks.clipping])
    assert ([Tasks(i) for i in Tasks.defaulttasklist(track, Tasks.clipping)]
            == [Tasks.undersampling, Tasks.cleaning, Tasks.alignment, Tasks.clipping])
    assert ([Tasks(i) for i in Tasks.defaulttasklist(track, ...)]
            == [Tasks.undersampling, Tasks.cleaning, Tasks.alignment, Tasks.clipping, Tasks.eventdetection,
                Tasks.peakselector, Tasks.fittohairpin])
    assert ([Tasks(i) for i in Tasks.defaulttasklist(None, Tasks.alignment, True)]
            == [Tasks.alignment])
    track.cleaned = True
    assert ([Tasks(i) for i in Tasks.defaulttasklist(track, Tasks.alignment)]
            == [Tasks.alignment])
    with localcontext(scripting = {'alignalways': False}):
        assert ([Tasks(i) for i in Tasks.defaulttasklist(track, Tasks.alignment)]
                == [])

    assert track.tasks.subtraction is None
    track.tasks.subtraction = 1 # type: ignore
    assert set(track.tasks.subtraction.beads) == {1}
    track.tasks.subtraction = 1,2
    assert set(track.tasks.subtraction.beads) == {1,2}
    track.cleaned = False
    assert ([Tasks(i) for i in Tasks.defaulttasklist(track, Tasks.alignment)]
            == [Tasks.undersampling, Tasks.subtraction, Tasks.cleaning, Tasks.alignment])

    assert track.op[:,:5].ncycles == 5
    assert set(track.op[[1,2]].beads.keys()) == {1,2}

    assert InstrumentType(utpath("big_legacy")) == InstrumentType.picotwist
    assert InstrumentType(utpath("sdi_track.pk")) == InstrumentType.sdi
    assert Tasks.peakselector(instrument = utpath("big_legacy")).rawfactor == 2.
    assert track.peaks.tasklist[-1].rawfactor == 2.
    assert Tasks.peakselector(instrument = utpath("sdi_track.pk")).rawfactor == 1.
    assert Track(path = utpath("sdi_track.pk")).peaks.tasklist[-1].rawfactor == 1.
Ejemplo n.º 30
0
def test_tracksdict_creation():
    "find all tracks with kmers"

    path = str(Path(utpath("big_legacy")).parent)
    assert sum(1 for i in TracksDict(path).values()) > 2
    assert {
        Path(i.path).stem
        for i in TracksDict(path, match="kmer").values()
    } == {
        'test035_5HPs_mix_CTGT--4xAc_5nM_25C_10sec',
        'test035_5HPs_mix_GATG_5nM_25C_8sec_with_ramp'
    }
    assert len(
        {Path(i.path).stem
         for i in TracksDict(path, match="3mer").values()}) == 0
    assert {
        Path(i.path).stem
        for i in TracksDict(path, match="4mer").values()
    } == {
        'test035_5HPs_mix_CTGT--4xAc_5nM_25C_10sec',
        'test035_5HPs_mix_GATG_5nM_25C_8sec_with_ramp'
    }
    assert {
        Path(i.path).stem
        for i in TracksDict(path, match="ctgt").values()
    } == {
        'test035_5HPs_mix_CTGT--4xAc_5nM_25C_10sec',
    }
    assert {
        Path(i.path).stem
        for i in TracksDict(path, match="gatg").values()
    } == {'test035_5HPs_mix_GATG_5nM_25C_8sec_with_ramp'}

    cur = TracksDict(path, match="4mer")
    assert ({
        Path(i.path).stem
        for i in TracksDict.leastcommonkeys(cur).values()
    } == {
        'test035_5HPs_mix_CTGT--4xAc_5nM_25C_10sec',
        'test035_5HPs_mix_GATG_5nM_25C_8sec_with_ramp'
    })
    assert ({
        Path(i.path).stem
        for i in TracksDict.leastcommonkeys(*cur.values()).values()
    } == {
        'test035_5HPs_mix_CTGT--4xAc_5nM_25C_10sec',
        'test035_5HPs_mix_GATG_5nM_25C_8sec_with_ramp'
    })
    assert ({
        Path(i.path).stem
        for i in TracksDict.leastcommonkeys(cur.values()).values()
    } == {
        'test035_5HPs_mix_CTGT--4xAc_5nM_25C_10sec',
        'test035_5HPs_mix_GATG_5nM_25C_8sec_with_ramp'
    })