def trackdataframe(self, *tasks: Union[Tasks, Task], process: bool = True, assign: Optional[Dict[str, Callable]] = None, transform: Union[Iterable[Callable], Callable, None] = None, tdtransform: Union[bool, None, Callable] = True, **kwa) -> pd.DataFrame: """ Tasks are applied to each track, with the last one being a DataFrameTask constructed using all other keywords. Parameters ---------- *tasks: The tasks to apply to the tracks prior to taking the dataframe assign: Other columns to add to the dataframe after computations tranform: An attribute of the DataFrameTask process: Whether to run all tracks or simply return the `Parallel` instance. tdtransform: Method for transforming the full dataframe (all tasks). If `None`, a default method `defaulttdtransform` is used """ transform = ([partial(pd.DataFrame.assign, **assign)] if assign else cast(List[Callable], []) + ([transform] if callable(transform) else [] if transform is None else list(transform))) if not tasks: tasks = (Tasks.alignment, ) elif Tasks(tasks[-1]) is Tasks.dataframe: transform = tasks[-1].transform + transform kwa = dict(tasks[-1].measures, **kwa) tasks = tasks[:-1] tasklist = [[ Tasks.trackreader(path=j.path, key=j.key), *Tasks.defaulttasklist(j, tasks[0], j.cleaned)[:-1], *(Tasks.create(i, instrument=j.path) for i in tasks), Tasks.dataframe(merge=True, measures=kwa, transform=transform) ] for j in self.values()] procs = register(SafeDataFrameProcessor, cache=register(), recursive=False) par = Parallel() for track, tlist in zip(self.values(), tasklist): par.extend([track], *tlist[1:], processors=procs) if process: out = par.process(None, 'concat') return (defaulttdtransform(tasklist, out) if tdtransform is True else tdtransform(out) if callable(tdtransform) else out) return par
def beadsview(self, track: Union[Track, Beads], *tasks) -> Beads: "return the beads view" return ( track.apply( Tasks.rampcleaning(**self.rampcleaning, instrument = track.instrument['type']), Tasks.alignment(**self.rampcleaning, instrument = track.instrument['type']), *tasks, ) if isinstance(track, Track) else cast(Beads, track) )
def dataframe( # pylint: disable=arguments-differ self, *tasks: Union[Tasks, Task], transform: Optional[Callable] = None, assign: Optional[Dict[str, Callable]] = None, sequence: Union[str, Path, None, Dict[str, str]] = None, oligos: Union[str, Iterable[str], None] = None, **kwa): """ Concatenates all dataframes obtained through *track.peaks.dataframe* See documentation in *track.peaks.dataframe* for other options """ if sequence is None: sequence = kwa.pop("sequences", None) if oligos is None: oligos = kwa.pop("oligo", None) if sequence: opts = { 'fit', 'constraints', 'match', 'pullphaseratio', 'singlestrand', 'baseline' } tasks = (*tasks, Tasks.fittohairpin( sequences=sequence, oligos='kmer' if oligos is None else oligos, **{i: kwa.pop(i) for i in opts & set(kwa)})) tracks = self._dictview() if self._reference is not None: if not any(Tasks(i) == Tasks.fittoreference for i in tasks): pks = self._items[self._reference].peaks if self._beads: pks = pks[list(self._beads)] if tasks and isinstance(tasks[-1], Tasks.fittohairpin().__class__): tasks = (*tasks[:-1], Tasks.fittoreference(peaks=pks), tasks[-1]) else: tasks = (*tasks, Tasks.fittoreference(peaks=pks)) if self._reference in tracks: tracks = tracks[f'~{self._reference}'] return tracks.dataframe(Tasks.peakselector, *tasks, transform=transform, assign=assign, **kwa)
def _ssitems(self): if self._singlestrand: sstrand = (Tasks.singlestrand() if self._singlestrand is True else self._singlestrand) return SingleStrandProcessor.apply(self._items[...], **sstrand.config()) return self._items
class BeadSubtractionDescriptor: "A descriptor for adding subtracted beads" NAME = Tasks(BeadSubtractionTask).name __doc__ = BeadSubtractionTask.__doc__ def __get__( self, inst, owner ) -> Union['BeadSubtractionDescriptor', Optional[BeadSubtractionTask]]: return self if inst is None else inst.tasks.get(self.NAME, None) def __delete__(self, inst): inst.tasks.pop(self.NAME, None) def __set__(self, inst, beads: Union[None, Dict[str,Any], BeadSubtractionTask, Sequence[int]]): tpe = BeadSubtractionTask lst = (beads.get('beads', None) if isinstance(beads, dict) else cast(BeadSubtractionTask, beads).beads if isinstance(beads, tpe) else [cast(int, beads)] if np.isscalar(beads) else [] if beads is None else cast(Sequence[int], beads)) if not beads: inst.tasks.pop(self.NAME, None) elif isinstance(beads, dict): inst.tasks[self.NAME] = BeadSubtractionTask(**beads) elif isinstance(beads, BeadSubtractionTask): inst.tasks[self.NAME] = beads else: inst.tasks.setdefault(self.NAME, BeadSubtractionTask()).beads = list(lst)
def _setupref(self): reftask = cast(FitToReferenceTask, deepcopy(self._reftask)) beads = set(self._base()[1]['bead']) beads &= set(self._items[self._reference].peaks.keys()) beads -= set(self._reftask.fitdata) data = dict(reftask.fitdata) sstrand = (Tasks.singlestrand() if self._singlestrand in (True, None) else None if not self._singlestrand else self._singlestrand) ref = self._items[self._reference].peaks if len(beads) > 2: with ProcessPoolExecutor() as pool: lst = [(sstrand, reftask.fitalg, ref, i) for i in beads] data.update({ i: j for i, j in pool.map(self._frompeaksfcn, lst) if j is not None }) else: for bead in beads: self._frompeaksfcn((sstrand, reftask.fitalg, ref, bead)) reftask.fitdata = data return reftask
def __cleaning__(cls): ret = cls.scriptingmodel("cleaning") if ret is None: ret = Tasks.__base_cleaning__() if cls.scriptingmodel("alignalways"): # Remove alignment as it is not an optional task. # It will be added back in __tasklist__ ret = tuple(i for i in ret if i is not Tasks.alignment) return ret
def _fit(self, tpe, sequence, oligos, kwa): "computes hairpin fits" if sequence is not None: kwa['sequences'] = sequence if oligos is not None: kwa['oligos'] = oligos last = getattr(Tasks, tpe)(**kwa) if not last.fit and last.oligos not in ['3mer', 'kmer', '4mer', '5mer']: raise IndexError('No fit found') return self.apply(*Tasks.defaulttasklist(self, Tasks.peakselector), last)
def getredim(self): "Returns the method used by the dynamic map" redim = super().getredim() if isinstance(redim, dict): redim = list(redim.items()) if self._format == '2d': redim = [i for i in redim if i[0] != 'key'] if self._format is None and self._fit: rngs = Tasks.scriptingmodel("fittoreferencerange") # type: ignore redim += [(i, slice(*rngs[i])) for i in ('stretch', 'bias')] return redim
def dataframe( self, track: Union[Track, Beads], beadlist: Optional[List[int]] = None, **kwa ) -> pd.DataFrame: """ return a dataframe containing all info """ beads = self.__beads(track, beadlist) dframe = RampDataFrameProcessor.dataframe( beads, **dict(self.dataframetask, **kwa) ) dframe.__dict__['tasklist'] = [ Tasks.trackreader(path = track.path), Tasks.rampcleaning(**self.rampcleaning, instrument = track.instrument['type']), Tasks.alignment(**self.rampcleaning, instrument = track.instrument['type']), RampStatsTask(**dict(self.dataframetask, **kwa)) ] if 'tasklist' not in getattr(dframe, '_metadata'): getattr(dframe, '_metadata').append('tasklist') return dframe
def fittoreference(self, task: FitToReferenceTask = None, **kwa) -> FitToReferenceDict: """ Computes fits to a reference. Arguments are for creating the FitToReferenceTask. """ if task is not None and len(kwa): raise NotImplementedError() return self.apply( *Tasks.defaulttasklist(self, Tasks.peakselector), (task if isinstance(task, FitToReferenceTask) else FitToReferenceTask( **kwa)))
def rescaletobead(self, bead) -> Track: "rescales elements to the current bead" self._trk.load() trk = shallowcopy(self._trk) items = getattr(Tasks.tasksmodel(), 'rescale')(trk, bead) instr = trk.instrument['type'].value if instr in items: names = {j: i for i, j in getattr(Tasks, '_cnv')(None).items()} for i, j in items[instr].items(): if j.zscaledattributes() == (): continue if hasattr(trk.tasks, names.get(i, i)): setattr(trk.tasks, names.get(i, i), j) trk.instrument['rescaling'] = items['rescaling'][instr] else: trk.instrument.pop('rescaling', None) return trk
def _default_kargs(self, key, bead, kwa): super()._default_kargs(key, bead, kwa) if self._reference is None or self._reference == key: return if self._reftask is not None: kwa['reftask'] = self._reftask if bead not in self._reftask: pks = self._items[self._reference].peaks if self._singlestrand is not False: if isinstance(self._singlestrand, SingleStrandTask): sstrand = self._singlestrand else: sstrand = Tasks.singlestrand() pks = SingleStrandProcessor.apply(pks[...], **sstrand.config()) self._reftask.frompeaks(pks[bead, ...])
def __getitem__(self, key): # pylint: disable=too-many-return-statements if isinstance(key, list) and key and all( isinstance(i, int) for i in key): tracks = self.clone() sel = Tasks.selection(selected=list(key)) for i in tracks.values(): i.tasks.selection = sel return tracks if isinstance(key, list) or isellipsis(key): return super().__getitem__(key) if isinstance(key, (Task, Tasks)): return self.apply(key) if isinstance(key, tuple) and all( isinstance(i, (Task, Tasks)) for i in key): return self.apply(*key) if isinstance(key, tuple): tracks = self.clone() for i in key: if not isellipsis(i): tracks = tracks[i] return tracks if (callable(getattr(key, 'match', None)) or (key in ('clean', '~clean') and key not in self)): return self.select(key) if key not in self and ('w' in key.lower() or 's' in key.lower()): try: int(key.lower().replace('w', '').replace('s', '')) except ValueError: pass else: return self.select(key) trk = super().__getitem__(key) return trk.apply(*self.tasks) if self.tasks else trk
def showfalsepositives(itms, rng, precision=1, scatter=False, **kwa): "display false positives" cls = FalsePositivesIdentifier fpos = cls.falsepositives(itms) tracks = sorted(fpos.track.unique()) dico = PeaksDict(config=cast(PeakSelectorTask, Tasks.peakselector(**kwa))) def _showfp(track): data = fpos[fpos.track == track] beads = data.bead.unique() dtl = cls.detailed(dico.config, data, precision=precision) disp = getattr(Detailed(dico, dtl), 'display')(zero=False).display() crv = hv.Curve((list(rng), [3, 3])).options(linewidth=20, alpha=.5) ovr = hv.Overlay(list(disp) + [crv]) if scatter: scatt = (hv.Scatter(data, 'bead', 'z').options(jitter=.8) * hv.Scatter( (np.concatenate([beads] * 2), [rng[0]] * len(beads) + [rng[0]] * len(beads)))) return (ovr + scatt).cols(1) return ovr return hv.DynamicMap(_showfp, kdims=['track']).redim.values(track=tracks)
def subtraction(self, beads = None, **kwa) -> Optional[Beads]: "displays aligned cycles for subtracted beads only" task = self.track.tasks.subtraction if task is None and beads is None: beads = self.track.cleaning.fixed(**kwa) if beads is None: beads = getattr(task, 'beads', None) if not beads: return None cnf = task.config() elif task is None: cnf = cast(Task, Tasks.subtraction(beads = beads)).config() else: cnf = task.config() cnf['beads'] = beads cnf.update(**kwa) proc = Tasks.subtraction.processor(**cnf) data = {i: self.track.data[i] for i in beads} data[-1] = proc.signal(self.track.beads) # type: ignore return self.track.apply(Tasks.alignment).withdata(data)
def process(self, beads: Sequence[int] = None, **kwa) -> Dict[int, Optional[DataCleaningErrorMessage]]: "returns a dictionnary of cleaning results" get = lambda x: x if x is None else x.args[0] # noqa itms = self.track.beads sub = self.track.tasks.subtraction # type: ignore if sub is not None: cache: dict = {} itms = BeadSubtractionProcessor.apply(itms, cache = cache, **sub.config()) itms = itms[list(beads)] if beads else itms dfltask = self.track.tasks.cleaning # type: ignore if dfltask is None: dfltask = Tasks.cleaning(instrument = self.track.instrument['type']) # use the default settings for this track dflt = dfltask.config() dflt.update(kwa) kwa = dflt return {info[0]: get(DataCleaningProcessor.compute(itms, info, **kwa)) for info in cast(Iterator, itms)}
def getredim(self): "Returns the method used by the dynamic map" values = list(_PeaksDisplay.getredim(self)) params = tuple( (i, getattr(self, '_' + i)) for i in ('stretch', 'bias') if getattr(self, '_' + i) != getattr(self.__class__, '_' + i)) rngs = Tasks.scriptingmodel("fittohairpinrange") # type: ignore if 'rescaling' in getattr(getattr(self._items, 'track', None), 'instrument', ()): coeff = float(self._items.track.instrument['rescaling']) rngs = { 'stretch': tuple(i / coeff for i in rngs['stretch']), 'bias': tuple(i * coeff for i in rngs['bias']) } pins = sequences.peaks(self._sequence, self._oligos) if isinstance(pins, np.ndarray): pins = {'hairpin 1': None} values.append(('sequence', sorted(dict(pins).keys()))) values.extend(params) values.extend((i, slice(*rngs[i])) for i in ('stretch', 'bias') if i not in (k for k, _ in params)) return values
def peaks(self) -> PeaksDict: "returns peaks found" return self.apply(*Tasks.defaulttasklist(self, Tasks.peakselector))
def events(self) -> Events: "Returns events in phase 5 only" return self.apply(*Tasks.defaulttasklist(self, Tasks.eventdetection))
def cleanbeads(self) -> Beads: "Return cleaned beads" return cast(Beads, self.apply(*Tasks.defaulttasklist(self, Tasks.alignment)))
def processors(self, *args, copy=True): "returns an iterator over the result of provided tasks" procs = Tasks.processors(self, *args) procs.data.setcachedefault(0, self) procs.copy = copy return procs
def tasklist(self, *args): "creates a tasklist" return Tasks.tasklist(self.path, *args)