def test_dataframe(): "tests dataframe production" data = next( create(utfilepath('big_selected'), EventDetectionTask(), PeakSelectorTask(), DataFrameTask(merge=True, measures=dict(dfevents=True))).run()) assert isinstance(data, pd.DataFrame) assert 'track' in data.index.names assert 'bead' in data.index.names assert 'peakposition' in data assert 'events' in data assert isinstance(data.events[0], pd.DataFrame) data = next( create(utfilepath('big_selected'), EventDetectionTask(), PeakSelectorTask(), DataFrameTask(merge=True)).run()) assert isinstance(data, pd.DataFrame) assert 'track' in data.index.names assert 'bead' in data.index.names assert 'cycle' not in data.index.names assert 'peakposition' in data data = next( create(utfilepath('big_selected'), EventDetectionTask(), PeakSelectorTask(), DataFrameTask(merge=True, measures=dict(events=True))).run()) assert isinstance(data, pd.DataFrame) assert 'track' in data.index.names assert 'bead' in data.index.names assert 'cycle' in data.index.names assert 'peakposition' in data
def test_control(): "tests task controller" peaks = [1., 5., 10., 20.] pair = create((EventSimulatorTask(peaks=peaks, brownian=.01, stretch=None, bias=None, rates=None, baselineargs=None, nbeads=2, ncycles=20), PeakSelectorTask())) beads = tuple(tuple(i) for i in pair.run())[0] assert tuple(i[0] for i in beads) == (0, 1) vals = tuple(beads[0][1]) assert_allclose([i for i, _ in vals], [0.] + peaks, atol=.01, rtol=1e-2) for peak, evts in vals: assert evts.dtype == 'O' assert all(i.dtype == EVENTS_DTYPE for i in evts) tmp = [i[0]['data'].min() for i in evts] assert_allclose(tmp, (peak, ) * 20, atol=0.1) tmp = [i[0]['data'].max() for i in evts] assert_allclose(tmp, (peak, ) * 20, atol=0.1) # test that things don't crash pair = create(utfilepath('big_selected'), EventDetectionTask(), PeakSelectorTask()) beads = tuple(next(pair.run())[0])
def test_toref_frompeaks(): "tests reference comparison" pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask()) pks = {i: tuple(j) for i, j in next(iter(pair.run()))} res = HistogramFit().frompeaks(next(iter(pks.values()))) ret = HistogramFit().optimize(res, HistogramData(res.histogram, res.minvalue+.01, res.binwidth/1.01)) assert_allclose(ret[1:], [1.01, .01], rtol = 5e-4, atol = 5e-4) pair = create(utpath("big_selected"), EventDetectionTask(), PeakSelectorTask()) pks = {i: tuple(j) for i, j in next(iter(pair.run()))} res = ReferencePeaksFit().frompeaks(next(iter(pks.values()))) ret = ReferencePeaksFit().optimize(res, res/1.01+0.1) assert_allclose(ret[1:], [1.01, .1], rtol = 5e-4, atol = 1e-2)
def test_rescaling(): "test rescaling" for cls in (SingleStrandTask, BaselinePeakTask): task = cls() obj = task.rescale(5.) assert obj is not task for i, j in task.__dict__.items(): if i in ("delta", "maxdisttozero"): assert abs(j * 5 - obj.__dict__[i]) < 1e-5 else: assert j == obj.__dict__[i] task = PeakSelectorTask() obj = task.rescale(5.) assert obj is not task assert obj == task
def test_gels(): "test min bias alignment of peaks" data = Experiment(baseline=None, thermaldrift=None).track(seed=1) track = Track(**data) lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(peakalign=None), GELSPeakAlignmentTask()) peaks = next(create(*lst).run()) _ = peaks[0] # test everything runs cycles = np.array([(1. if i > 5 else 0., 0.) for i in range(10)], dtype=GELSPeakAlignmentTask.DTYPE) stats = np.array([np.roll(cycles, i) for i in range(4)], dtype=GELSPeakAlignmentTask.DTYPE) for i in range(4): stats[i, :]['mean'][:] += i * 10 truth = np.arange(10, dtype='f4') * .1 truth -= np.median(truth) for i in range(10): stats[:, i]['mean'][:] -= truth[i] found = lst[-1](stats) truth = np.array([ -0.47142908, -0.37142903, -0.27142864, 0., 0., 0., 0.12857169, 0.22857153, 0.32857174, 0.4285718 ], dtype='f4') assert_allclose(found, truth)
def test_minbiasalignment(): "test min bias alignment of peaks" data = Experiment(baseline=None, thermaldrift=None).track(seed=1) track = Track(**data) lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(peakalign=None), MinBiasPeakAlignmentTask()) peaks = next(create(*lst).run()) _ = peaks[0] # test everything runs cycles = np.array([(1. if i > 5 else 0., 0.) for i in range(10)], dtype=MinBiasPeakAlignmentTask.DTYPE) stats = np.array([np.roll(cycles, i) for i in range(4)], dtype=MinBiasPeakAlignmentTask.DTYPE) for i in range(4): stats[i, :]['mean'][:] += i * 10 truth = np.arange(10, dtype='f4') * .1 truth -= np.median(truth) for i in range(10): stats[:, i]['mean'][:] -= truth[i] found = lst[-1](stats) truth = np.array([ -0.44999883, -0.34998798, -0.24997711, 0., 0., 0., 0., 0.24997902, 0.34999132, 0.45000142 ], dtype='f4') assert_allclose(found, truth)
def test_precision(): "tests that peaks can be found with a given precision" sim = dict(durations=[15, 30, 15, 60, 60, 200, 15, 100], drift=None, baseline=None, framerate=1., poisson=dict(rates=[.05, .05, .1, .1, .2, .2], sizes=[20, 10, 20, 10, 20, 10], peaks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], store=['sizes']), seed=0, nbeads=2, ncycles=100) pair = create(TrackSimulatorTask(**sim), EventDetectionTask(filter=NonLinearFilter()), PeakSelectorTask(), PeakProbabilityTask()) tmp = next(pair.run()) sim = tmp.track.simulator[0]['sizes'] vals = tuple(tmp[0]) peaks = np.array([i for i, _ in vals]) assert_allclose(peaks, [0., .1, .2, .3, .4, .5, .6], rtol=1e-3, atol=1e-3) truth = np.sum(sim >= 5, 0) / 100. # type: ignore exp = np.array([i.hybridisationrate for _, i in vals[1:]]) assert_allclose(exp, truth, rtol=1e-3, atol=1e-3) truth = [np.mean(i[i >= 5]) for i in cast(np.ndarray, sim).T] exp = np.array([i.averageduration for _, i in vals[1:]]) assert np.all(np.abs(exp - truth) < 2)
def test_baselinepeak(): "test single strand peak" data = Experiment(baseline=None, thermaldrift=None).track(seed=1) track = Track(**data) lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(), BaselinePeakTask()) out1 = [i for i, _ in next(create(*lst).run())[0]] out2 = [i for i, _ in next(create(*lst[:-1]).run())[0]] assert out1 == out2[1:]
def test_hp_dataframe(record): "test fit to hp dataframe" pair = next(iter(create( TrackReaderTask(path = utpath("big_legacy")), EventDetectionTask(), PeakSelectorTask(), FitToHairpinTask( sequence = utpath("hairpins.fasta"), oligos = "4mer", fit = ChiSquareFit() ), DataFrameTask(merge = True, measures = dict( peaks = dict(missing = True, peakhfsigma = True) )), ).run())) assert pair.shape == (102, 29) assert 'falseneg' in pair.peaks[1].status.unique() assert pair.index.names == ['hpin', 'track', 'bead'] assert isinstance(pair.peaks.values[0], pd.DataFrame) assert 'modification' in pair.columns assert hasattr(pair, 'tasklist') assert record["withmissing"].approx( pair.drop(columns = ['peaks', 'cost', 'modification']).iloc[:5], atol = 5e-4 ) assert record["peakswithmissing"].approx( pair.peaks[1].iloc[:5], atol = 5e-4 ) pair = next(iter(create( TrackReaderTask(path = utpath("big_legacy")), EventDetectionTask(), PeakSelectorTask(), FitToHairpinTask( sequence = utpath("hairpins.fasta"), oligos = "4mer", fit = ChiSquareFit() ), DataFrameTask(merge = True), ).run())) assert pair.shape == (102, 26) assert pair.index.names == ['hpin', 'track', 'bead'] assert 'modification' in pair.columns assert hasattr(pair, 'tasklist')
def test_singlestrandpeak(): "test single strand peak" data = Experiment(baseline=None, thermaldrift=None).track(seed=1) track = Track(**data) lst = (InMemoryTrackTask(track), EventDetectionTask(), PeakSelectorTask(), SingleStrandTask()) peaks = next(create(*lst[:-1]).run()) proc = SingleStrandProcessor() ncl = proc.nonclosingramps(peaks, 0) truth = np.where(data['truth'][0].strandclosing.duration >= track.phase.duration(..., range(5)))[0] assert set(ncl) == set(truth) out1 = [i for i, _ in next(create(*lst).run())[0]] out2 = [i for i, _ in next(create(*lst[:-1]).run())[0]] assert out1 == out2[:-1]
def newtasks(mdl, beads=None, withhp=False): "add a list of tasks to the model" lst = [ TrackReaderTask(path=utpath("big_legacy")), DataCleaningTask(), ClippingTask(), ExtremumAlignmentTask(), EventDetectionTask(), PeakSelectorTask() ] if beads: lst.insert(1, DataSelectionTask(selected=list(beads))) if withhp: lst.append( FitToHairpinTask(sequences=utpath("hairpins.fasta"), oligos="kmer")) mdl.tasks.tasks.tasks.add(create(lst))
class PeakFindingBatchTemplate(BatchTemplate): "Template of tasks to run" alignment: Optional[ExtremumAlignmentTask] = None drift = [DriftTask(onbeads=True)] detection: Optional[EventDetectionTask] = EventDetectionTask() peaks: Optional[PeakSelectorTask] = PeakSelectorTask() @initdefaults(frozenset(locals())) def __init__(self, **kwa): super().__init__(**kwa) def __iter__(self) -> Iterator[Task]: if self.alignment: yield self.alignment yield from self.drift for i in (self.detection, self.peaks): if i is None: return yield i