def test_rampdatacleaning(): "test RampDataCleaningTask in scripting context" filepath = "Ramp-fov5-PBS_AfterDNApol1.pk" beadid = 0 # the only bead in the pk-file proc_pass = create( TrackReaderTask(path=utpath(filepath)), RampDataCleaningTask(minpopulation=20, extentoutlierspercentile=90, minextentoutliers=90)) proc_fail = create( TrackReaderTask(path=utpath(filepath)), RampDataCleaningTask(minpopulation=100, extentoutlierspercentile=95, minextentoutliers=90, maxhfsigma=.0002)) data_pass = next(iter(proc_pass.run())) num_datapoints = data_pass[beadid].shape[0] num_valid_point = np.sum(np.isfinite(data_pass[beadid])) assert num_valid_point / data_pass.nframes == pytest.approx( 10 / 22, rel=1e-4) # 10/22 cyc pass data_fail = next(iter(proc_fail.run())) with pytest.raises( DataCleaningException, match=re.escape('22 cycles: σ[HF] > 0.0002\n' + '12 cycles: Δz-outlier < 90% of Δz-consensus')): data_fail[beadid]
def test_message_creation(): "test message creation" proc = create(TrackReaderTask(path=utpath("big_legacy")), DataCleaningTask()) data = next(iter(proc.run())) with pytest.raises(DataCleaningException): data[5]
def test_cleaning_dataframe(): "test cleanin creation" proc = create(TrackReaderTask(path=utpath("big_legacy")), DataCleaningTask(), DataFrameTask(merge=True)) data = next(iter(proc.run())) assert list(data.reset_index(0).loc[5].bad.unique()) == [True] assert list(data.reset_index(0).loc[0].bad.unique()) == [False] assert 'modification' in data.columns assert hasattr(data, 'tasklist')
def test_adapt_procs_ref(): "test processors adaptor remove ref" procs = [ProcessorController() for i in range(3)] procs[0].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor) procs[0].add(_Ref(), _DummyProc) procs[1].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor) procs[2].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor) mdl = TasksModel() mdl.config.defaulttaskindex = lambda *_: appendtask for i in (mdl.config.sdi, mdl.config.picotwist): i['fittoreference'] = _Ref() i['fittohairpin'] = _Hairpin() i['dataframe'] = DataFrameTask() for i in (_Ref, _Hairpin, DataFrameTask): mdl.state.processors[i] = _DummyProc mdl.state.processors[TrackReaderTask] = TrackReaderProcessor for i in procs: mdl.tasks.add(i) mdl.dataframes.peaks.measures = {'events': True} lst = mdl.processors assert len(lst) == len(procs) for i in procs: assert len(lst[i.model[0]].model) == 3 assert i.model is not lst[i.model[0]].model assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask' assert lst[i.model[0]].model[-1].measures == {'events': True} mdl.state.reference = procs[1].model[0] lst = mdl.processors assert len(lst) == len(procs) for i in procs: assert len(lst[i.model[0]].model) == 3 + (i.model[0] is not mdl.state.reference) assert i.model is not lst[i.model[0]].model assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask' assert lst[i.model[0]].model[-1].measures == {'events': True} if i.model[0] is not mdl.state.reference: assert lst[i.model[0]].model[2].__class__.__name__ == '_Ref' assert lst[i.model[0]].model[2].defaultdata is lst[mdl.state.reference].data
def test_hp_dataframe(record): "test fit to hp dataframe" pair = next(iter(create( TrackReaderTask(path = utpath("big_legacy")), EventDetectionTask(), PeakSelectorTask(), FitToHairpinTask( sequence = utpath("hairpins.fasta"), oligos = "4mer", fit = ChiSquareFit() ), DataFrameTask(merge = True, measures = dict( peaks = dict(missing = True, peakhfsigma = True) )), ).run())) assert pair.shape == (102, 29) assert 'falseneg' in pair.peaks[1].status.unique() assert pair.index.names == ['hpin', 'track', 'bead'] assert isinstance(pair.peaks.values[0], pd.DataFrame) assert 'modification' in pair.columns assert hasattr(pair, 'tasklist') assert record["withmissing"].approx( pair.drop(columns = ['peaks', 'cost', 'modification']).iloc[:5], atol = 5e-4 ) assert record["peakswithmissing"].approx( pair.peaks[1].iloc[:5], atol = 5e-4 ) pair = next(iter(create( TrackReaderTask(path = utpath("big_legacy")), EventDetectionTask(), PeakSelectorTask(), FitToHairpinTask( sequence = utpath("hairpins.fasta"), oligos = "4mer", fit = ChiSquareFit() ), DataFrameTask(merge = True), ).run())) assert pair.shape == (102, 26) assert pair.index.names == ['hpin', 'track', 'bead'] assert 'modification' in pair.columns assert hasattr(pair, 'tasklist')
def test_fixedbeadsorting(): "test fixed bead detection" import cleaning.beadsubtraction as B B.BeadSubtractionTask = BeadSubtractionTask beads = next( iter(create(TrackReaderTask(path=utpath("fixedbeads.pk"))).run())) lst = FixedBeadDetection()(beads) assert len(lst) == 1 assert lst[0][-1] == 4 frames = FixedBeadDetection().dataframe(beads) assert frames.shape == (4, 16) assert set(frames[frames.good].bead.values) == {4}
def test_cycletable(bead: int, taskcount: int): "test rescaling" from eventdetection.processor import ExtremumAlignmentTask from cleaning.view._plot import GuiDataCleaningProcessor tasks: Tuple[Task, ...] = ( TrackReaderTask(path=utpath("big_legacy")), DataCleaningTask(), ) if taskcount in (1, 2): tasks += (ExtremumAlignmentTask(), ) if taskcount in (-2, 2): tasks += (ClippingTask(), ) find = lambda tpe: next( (i for i in tasks if isinstance(i, tpe)), None) # noqa ctrl = create(tasks) _ = next(iter(ctrl.run()))[0] # get the track track = ctrl.data.getcache(tasks[0])() GuiDataCleaningProcessor.computeall(bead, ctrl, cleaning=find(DataCleaningTask), alignment=find(ExtremumAlignmentTask), clipping=find(ClippingTask)) info = { i.name: i.values for i in ctrl.data.getcache(DataCleaningTask)()[bead][0] } applied_rules = set(info.keys()) assert applied_rules == { 'population', 'hfsigma', 'extent', 'pingpong', 'saturation', 'discarded', 'alignment', 'clipping' } assert 'phasejump' not in applied_rules # 'phasejump'-rule must *only* act on SDI-experiments assert all(len(i) == track.ncycles for i in info.values()) if find(ExtremumAlignmentTask): nans = np.isnan(info['alignment']) assert nans.sum() == {0: 0, 11: 0, 15: 23}[bead] assert_equal(info['discarded'][nans], np.ones(nans.sum(), dtype='f4')) else: assert_equal(info['alignment'], np.zeros(track.ncycles, dtype='f4')) if find(ClippingTask): assert ((info['discarded'][np.isfinite(info['clipping'])] + 1e-5) < info['clipping'][np.isfinite(info['clipping'])]).sum() == 0 else: assert_equal(info['clipping'], np.zeros(track.ncycles, dtype='f4'))
def test_adapt_procs_fittohp(): "test processors adaptor remove ref" procs = [ProcessorController() for i in range(2)] procs[0].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor) procs[0].add(_Ref(), _DummyProc) procs[0].add(_Hairpin(resolve = True), _DummyProc) procs[1].add(TrackReaderTask(utpath("big_legacy")), TrackReaderProcessor) mdl = TasksModel() mdl.config.defaulttaskindex = lambda _1, tsk, *_2: ( 1 if isinstance(tsk, FixedBeadDetectionTask) else appendtask ) mdl.state.sequences = 'a' for i in (mdl.config.sdi, mdl.config.picotwist): i['fittoreference'] = _Ref() i['fittohairpin'] = _Hairpin(resolve = False) i['dataframe'] = DataFrameTask() for i in (_Ref, _Hairpin, DataFrameTask): mdl.state.processors[i] = _DummyProc mdl.state.processors[TrackReaderTask] = TrackReaderProcessor for i in procs: mdl.tasks.add(i) mdl.dataframes.fits.measures = {'peaks': {'all': True, 'events': True}} lst = mdl.processors assert len(lst) == 1 for i in procs[:1]: assert len(lst[i.model[0]].model) == 4 assert i.model is not lst[i.model[0]].model assert lst[i.model[0]].model[2].__class__.__name__ == '_Hairpin' assert lst[i.model[0]].model[-1].__class__.__name__ == 'DataFrameTask' assert lst[i.model[0]].model[-1].measures == {'peaks': {'all': True, 'events': True}} assert lst[i.model[0]].model[2].__class__.__name__ == '_Hairpin'
def newtasks(mdl, beads=None, withhp=False): "add a list of tasks to the model" lst = [ TrackReaderTask(path=utpath("big_legacy")), DataCleaningTask(), ClippingTask(), ExtremumAlignmentTask(), EventDetectionTask(), PeakSelectorTask() ] if beads: lst.insert(1, DataSelectionTask(selected=list(beads))) if withhp: lst.append( FitToHairpinTask(sequences=utpath("hairpins.fasta"), oligos="kmer")) mdl.tasks.tasks.tasks.add(create(lst))
def test_diskcache_clear(tmp_path): reader = TrackReaderTask(path=utpath("big_legacy")) tasks = [ create(reader, UndersamplingTask(), DataFrameTask()), create(reader, DataFrameTask()) ] for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': i}) cnf = DiskCacheConfig(path=str(tmp_path / "cache")) assert not Path(cnf.path).exists() cnf.insert(tasks, 10001) assert Path(cnf.path).exists() cnf.clear(complete=True) with DiskCache(cnf.path) as cache: assert sum(1 for _ in cache.iterkeys()) == 1 assert cache.get(VERSION_KEY) == VERSION for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': -1}) cnf.insert(tasks, 10001) assert Path(cnf.path).exists() for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': -2}) cnf.update(tasks, 10001) assert tasks[0].data.getcache(DataFrameTask)()['index'] == -1 assert tasks[1].data.getcache(DataFrameTask)()['index'] == -1 for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': -2}) cnf.clear(processors=tasks) assert Path(cnf.path).exists() cnf.update(tasks, 10001) assert tasks[0].data.getcache(DataFrameTask)()['index'] == -2 assert tasks[1].data.getcache(DataFrameTask)()['index'] == -2
def test_diskcache_insert(tmp_path): reader = TrackReaderTask(path=utpath("big_legacy")) tasks = [ create(reader, UndersamplingTask(), DataFrameTask()), create(reader, DataFrameTask()) ] for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': i}) cnf = DiskCacheConfig(path=str(tmp_path / "cache")) cnf.clear() cnf.insert(tasks, 10001) assert cnf.get(tasks[0], 10001)['index'] == 0 assert cnf.get(tasks[1], 10001)['index'] == 1 for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': i * 2 + 1}) cnf.insert(tasks, 10002) assert cnf.get(tasks[0], 10002)['index'] == 1 assert cnf.get(tasks[1], 10002)['index'] == 3 tasks = [ create(reader, UndersamplingTask(), DataFrameTask()), create(reader, DataFrameTask()) ] for i, procs in enumerate(tasks): procs.data.setcache(DataFrameTask, {'index': -1}) cnf.update(tasks, 10001) assert tasks[0].data.getcache(DataFrameTask)()['index'] == -1 assert tasks[1].data.getcache(DataFrameTask)()['index'] == -1 cnf.update(tasks, 10002) assert tasks[0].data.getcache(DataFrameTask)()['index'] == 1 assert tasks[1].data.getcache(DataFrameTask)()['index'] == 3