def trackdataframe(self,
                       *tasks: Union[Tasks, Task],
                       process: bool = True,
                       assign: Optional[Dict[str, Callable]] = None,
                       transform: Union[Iterable[Callable], Callable,
                                        None] = None,
                       tdtransform: Union[bool, None, Callable] = True,
                       **kwa) -> pd.DataFrame:
        """
        Tasks are applied to each track, with the last one being a
        DataFrameTask constructed using all other keywords.

        Parameters
        ----------
        *tasks:
            The tasks to apply to the tracks prior to taking the dataframe
        assign:
            Other columns to add to the dataframe after computations
        tranform:
            An attribute of the DataFrameTask
        process:
            Whether to run all tracks or simply return the `Parallel` instance.
        tdtransform:
            Method for transforming the full dataframe (all tasks). If `None`,
            a default method `defaulttdtransform` is used
        """
        transform = ([partial(pd.DataFrame.assign, **assign)]
                     if assign else cast(List[Callable], []) +
                     ([transform] if callable(transform) else
                      [] if transform is None else list(transform)))

        if not tasks:
            tasks = (Tasks.alignment, )
        elif Tasks(tasks[-1]) is Tasks.dataframe:
            transform = tasks[-1].transform + transform
            kwa = dict(tasks[-1].measures, **kwa)
            tasks = tasks[:-1]

        tasklist = [[
            Tasks.trackreader(path=j.path, key=j.key),
            *Tasks.defaulttasklist(j, tasks[0], j.cleaned)[:-1],
            *(Tasks.create(i, instrument=j.path) for i in tasks),
            Tasks.dataframe(merge=True, measures=kwa, transform=transform)
        ] for j in self.values()]

        procs = register(SafeDataFrameProcessor,
                         cache=register(),
                         recursive=False)
        par = Parallel()
        for track, tlist in zip(self.values(), tasklist):
            par.extend([track], *tlist[1:], processors=procs)

        if process:
            out = par.process(None, 'concat')
            return (defaulttdtransform(tasklist, out) if tdtransform is True
                    else tdtransform(out) if callable(tdtransform) else out)
        return par
Example #2
0
 def beadsview(self, track: Union[Track, Beads], *tasks) -> Beads:
     "return the beads view"
     return (
         track.apply(
             Tasks.rampcleaning(**self.rampcleaning, instrument = track.instrument['type']),
             Tasks.alignment(**self.rampcleaning, instrument = track.instrument['type']),
             *tasks,
         ) if isinstance(track, Track) else
         cast(Beads, track)
     )
Example #3
0
    def dataframe(  # pylint: disable=arguments-differ
            self,
            *tasks: Union[Tasks, Task],
            transform: Optional[Callable] = None,
            assign: Optional[Dict[str, Callable]] = None,
            sequence: Union[str, Path, None, Dict[str, str]] = None,
            oligos: Union[str, Iterable[str], None] = None,
            **kwa):
        """
        Concatenates all dataframes obtained through *track.peaks.dataframe*

        See documentation in *track.peaks.dataframe* for other options
        """
        if sequence is None:
            sequence = kwa.pop("sequences", None)
        if oligos is None:
            oligos = kwa.pop("oligo", None)
        if sequence:
            opts = {
                'fit', 'constraints', 'match', 'pullphaseratio',
                'singlestrand', 'baseline'
            }
            tasks = (*tasks,
                     Tasks.fittohairpin(
                         sequences=sequence,
                         oligos='kmer' if oligos is None else oligos,
                         **{i: kwa.pop(i)
                            for i in opts & set(kwa)}))

        tracks = self._dictview()
        if self._reference is not None:
            if not any(Tasks(i) == Tasks.fittoreference for i in tasks):
                pks = self._items[self._reference].peaks
                if self._beads:
                    pks = pks[list(self._beads)]

                if tasks and isinstance(tasks[-1],
                                        Tasks.fittohairpin().__class__):
                    tasks = (*tasks[:-1], Tasks.fittoreference(peaks=pks),
                             tasks[-1])
                else:
                    tasks = (*tasks, Tasks.fittoreference(peaks=pks))

            if self._reference in tracks:
                tracks = tracks[f'~{self._reference}']
        return tracks.dataframe(Tasks.peakselector,
                                *tasks,
                                transform=transform,
                                assign=assign,
                                **kwa)
 def _ssitems(self):
     if self._singlestrand:
         sstrand = (Tasks.singlestrand()
                    if self._singlestrand is True else self._singlestrand)
         return SingleStrandProcessor.apply(self._items[...],
                                            **sstrand.config())
     return self._items
Example #5
0
class BeadSubtractionDescriptor:
    "A descriptor for adding subtracted beads"
    NAME    = Tasks(BeadSubtractionTask).name
    __doc__ = BeadSubtractionTask.__doc__

    def __get__(
            self, inst, owner
    ) -> Union['BeadSubtractionDescriptor', Optional[BeadSubtractionTask]]:
        return self if inst is None else inst.tasks.get(self.NAME, None)

    def __delete__(self, inst):
        inst.tasks.pop(self.NAME, None)

    def __set__(self, inst,
                beads: Union[None, Dict[str,Any], BeadSubtractionTask, Sequence[int]]):
        tpe = BeadSubtractionTask
        lst = (beads.get('beads', None)                 if isinstance(beads, dict) else
               cast(BeadSubtractionTask, beads).beads   if isinstance(beads, tpe)  else
               [cast(int, beads)]                       if np.isscalar(beads)      else
               []                                       if beads is None           else
               cast(Sequence[int], beads))

        if not beads:
            inst.tasks.pop(self.NAME, None)
        elif isinstance(beads, dict):
            inst.tasks[self.NAME] = BeadSubtractionTask(**beads)
        elif isinstance(beads, BeadSubtractionTask):
            inst.tasks[self.NAME] = beads
        else:
            inst.tasks.setdefault(self.NAME, BeadSubtractionTask()).beads = list(lst)
Example #6
0
    def _setupref(self):
        reftask = cast(FitToReferenceTask, deepcopy(self._reftask))
        beads = set(self._base()[1]['bead'])
        beads &= set(self._items[self._reference].peaks.keys())
        beads -= set(self._reftask.fitdata)

        data = dict(reftask.fitdata)

        sstrand = (Tasks.singlestrand() if self._singlestrand in (True, None)
                   else None if not self._singlestrand else self._singlestrand)

        ref = self._items[self._reference].peaks
        if len(beads) > 2:
            with ProcessPoolExecutor() as pool:
                lst = [(sstrand, reftask.fitalg, ref, i) for i in beads]
                data.update({
                    i: j
                    for i, j in pool.map(self._frompeaksfcn, lst)
                    if j is not None
                })
        else:
            for bead in beads:
                self._frompeaksfcn((sstrand, reftask.fitalg, ref, bead))

        reftask.fitdata = data
        return reftask
Example #7
0
def __cleaning__(cls):
    ret = cls.scriptingmodel("cleaning")
    if ret is None:
        ret = Tasks.__base_cleaning__()

    if cls.scriptingmodel("alignalways"):
        # Remove alignment as it is not an optional task.
        # It will be added back in __tasklist__
        ret = tuple(i for i in ret if i is not Tasks.alignment)
    return ret
Example #8
0
def _fit(self, tpe, sequence, oligos, kwa):
    "computes hairpin fits"
    if sequence is not None:
        kwa['sequences'] = sequence
    if oligos is not None:
        kwa['oligos'] = oligos

    last = getattr(Tasks, tpe)(**kwa)
    if not last.fit and last.oligos not in ['3mer', 'kmer', '4mer', '5mer']:
        raise IndexError('No fit found')
    return self.apply(*Tasks.defaulttasklist(self, Tasks.peakselector), last)
Example #9
0
    def getredim(self):
        "Returns the method used by the dynamic map"
        redim = super().getredim()
        if isinstance(redim, dict):
            redim = list(redim.items())

        if self._format == '2d':
            redim = [i for i in redim if i[0] != 'key']

        if self._format is None and self._fit:
            rngs = Tasks.scriptingmodel("fittoreferencerange")  # type: ignore
            redim += [(i, slice(*rngs[i])) for i in ('stretch', 'bias')]
        return redim
Example #10
0
 def dataframe(
         self,
         track:    Union[Track, Beads],
         beadlist: Optional[List[int]] = None,
         **kwa
 ) -> pd.DataFrame:
     """
     return a dataframe containing all info
     """
     beads  = self.__beads(track, beadlist)
     dframe = RampDataFrameProcessor.dataframe(
         beads,
         **dict(self.dataframetask, **kwa)
     )
     dframe.__dict__['tasklist'] = [
         Tasks.trackreader(path = track.path),
         Tasks.rampcleaning(**self.rampcleaning, instrument = track.instrument['type']),
         Tasks.alignment(**self.rampcleaning, instrument = track.instrument['type']),
         RampStatsTask(**dict(self.dataframetask, **kwa))
     ]
     if 'tasklist' not in getattr(dframe, '_metadata'):
         getattr(dframe, '_metadata').append('tasklist')
     return dframe
Example #11
0
def fittoreference(self,
                   task: FitToReferenceTask = None,
                   **kwa) -> FitToReferenceDict:
    """
    Computes fits to a reference.

    Arguments are for creating the FitToReferenceTask.
    """
    if task is not None and len(kwa):
        raise NotImplementedError()
    return self.apply(
        *Tasks.defaulttasklist(self, Tasks.peakselector),
        (task if isinstance(task, FitToReferenceTask) else FitToReferenceTask(
            **kwa)))
Example #12
0
 def rescaletobead(self, bead) -> Track:
     "rescales elements to the current bead"
     self._trk.load()
     trk = shallowcopy(self._trk)
     items = getattr(Tasks.tasksmodel(), 'rescale')(trk, bead)
     instr = trk.instrument['type'].value
     if instr in items:
         names = {j: i for i, j in getattr(Tasks, '_cnv')(None).items()}
         for i, j in items[instr].items():
             if j.zscaledattributes() == ():
                 continue
             if hasattr(trk.tasks, names.get(i, i)):
                 setattr(trk.tasks, names.get(i, i), j)
         trk.instrument['rescaling'] = items['rescaling'][instr]
     else:
         trk.instrument.pop('rescaling', None)
     return trk
Example #13
0
    def _default_kargs(self, key, bead, kwa):
        super()._default_kargs(key, bead, kwa)
        if self._reference is None or self._reference == key:
            return

        if self._reftask is not None:
            kwa['reftask'] = self._reftask
            if bead not in self._reftask:
                pks = self._items[self._reference].peaks
                if self._singlestrand is not False:
                    if isinstance(self._singlestrand, SingleStrandTask):
                        sstrand = self._singlestrand
                    else:
                        sstrand = Tasks.singlestrand()
                    pks = SingleStrandProcessor.apply(pks[...],
                                                      **sstrand.config())

                self._reftask.frompeaks(pks[bead, ...])
Example #14
0
    def __getitem__(self, key):  # pylint: disable=too-many-return-statements
        if isinstance(key, list) and key and all(
                isinstance(i, int) for i in key):
            tracks = self.clone()
            sel = Tasks.selection(selected=list(key))
            for i in tracks.values():
                i.tasks.selection = sel
            return tracks

        if isinstance(key, list) or isellipsis(key):
            return super().__getitem__(key)

        if isinstance(key, (Task, Tasks)):
            return self.apply(key)

        if isinstance(key, tuple) and all(
                isinstance(i, (Task, Tasks)) for i in key):
            return self.apply(*key)

        if isinstance(key, tuple):
            tracks = self.clone()
            for i in key:
                if not isellipsis(i):
                    tracks = tracks[i]
            return tracks

        if (callable(getattr(key, 'match', None))
                or (key in ('clean', '~clean') and key not in self)):
            return self.select(key)

        if key not in self and ('w' in key.lower() or 's' in key.lower()):
            try:
                int(key.lower().replace('w', '').replace('s', ''))
            except ValueError:
                pass
            else:
                return self.select(key)

        trk = super().__getitem__(key)
        return trk.apply(*self.tasks) if self.tasks else trk
Example #15
0
def showfalsepositives(itms, rng, precision=1, scatter=False, **kwa):
    "display false positives"
    cls = FalsePositivesIdentifier
    fpos = cls.falsepositives(itms)
    tracks = sorted(fpos.track.unique())
    dico = PeaksDict(config=cast(PeakSelectorTask, Tasks.peakselector(**kwa)))

    def _showfp(track):
        data = fpos[fpos.track == track]
        beads = data.bead.unique()
        dtl = cls.detailed(dico.config, data, precision=precision)
        disp = getattr(Detailed(dico, dtl), 'display')(zero=False).display()
        crv = hv.Curve((list(rng), [3, 3])).options(linewidth=20, alpha=.5)
        ovr = hv.Overlay(list(disp) + [crv])
        if scatter:
            scatt = (hv.Scatter(data, 'bead', 'z').options(jitter=.8) *
                     hv.Scatter(
                         (np.concatenate([beads] * 2),
                          [rng[0]] * len(beads) + [rng[0]] * len(beads))))
            return (ovr + scatt).cols(1)
        return ovr

    return hv.DynamicMap(_showfp, kdims=['track']).redim.values(track=tracks)
Example #16
0
    def subtraction(self, beads = None, **kwa) -> Optional[Beads]:
        "displays aligned cycles for subtracted beads only"
        task = self.track.tasks.subtraction
        if task is None and beads is None:
            beads = self.track.cleaning.fixed(**kwa)

        if beads is None:
            beads = getattr(task, 'beads', None)
            if not beads:
                return None
            cnf = task.config()

        elif task is None:
            cnf          = cast(Task, Tasks.subtraction(beads = beads)).config()
        else:
            cnf          = task.config()
            cnf['beads'] = beads
        cnf.update(**kwa)

        proc     = Tasks.subtraction.processor(**cnf)
        data     = {i: self.track.data[i] for i in beads}
        data[-1] = proc.signal(self.track.beads)  # type: ignore
        return self.track.apply(Tasks.alignment).withdata(data)
Example #17
0
    def process(self,
                beads: Sequence[int] = None,
                **kwa) -> Dict[int, Optional[DataCleaningErrorMessage]]:
        "returns a dictionnary of cleaning results"
        get  = lambda x: x if x is None else x.args[0]  # noqa

        itms = self.track.beads
        sub  = self.track.tasks.subtraction  # type: ignore
        if sub is not None:
            cache: dict = {}
            itms        = BeadSubtractionProcessor.apply(itms, cache = cache, **sub.config())
        itms = itms[list(beads)] if beads else itms

        dfltask = self.track.tasks.cleaning  # type: ignore
        if dfltask is None:
            dfltask = Tasks.cleaning(instrument = self.track.instrument['type'])

        # use the default settings for this track
        dflt = dfltask.config()
        dflt.update(kwa)
        kwa  = dflt

        return {info[0]: get(DataCleaningProcessor.compute(itms, info, **kwa))
                for info in cast(Iterator, itms)}
Example #18
0
    def getredim(self):
        "Returns the method used by the dynamic map"
        values = list(_PeaksDisplay.getredim(self))
        params = tuple(
            (i, getattr(self, '_' + i)) for i in ('stretch', 'bias')
            if getattr(self, '_' + i) != getattr(self.__class__, '_' + i))
        rngs = Tasks.scriptingmodel("fittohairpinrange")  # type: ignore
        if 'rescaling' in getattr(getattr(self._items, 'track', None),
                                  'instrument', ()):
            coeff = float(self._items.track.instrument['rescaling'])
            rngs = {
                'stretch': tuple(i / coeff for i in rngs['stretch']),
                'bias': tuple(i * coeff for i in rngs['bias'])
            }

        pins = sequences.peaks(self._sequence, self._oligos)
        if isinstance(pins, np.ndarray):
            pins = {'hairpin 1': None}

        values.append(('sequence', sorted(dict(pins).keys())))
        values.extend(params)
        values.extend((i, slice(*rngs[i])) for i in ('stretch', 'bias')
                      if i not in (k for k, _ in params))
        return values
Example #19
0
def peaks(self) -> PeaksDict:
    "returns peaks found"
    return self.apply(*Tasks.defaulttasklist(self, Tasks.peakselector))
Example #20
0
def events(self) -> Events:
    "Returns events in phase 5 only"
    return self.apply(*Tasks.defaulttasklist(self, Tasks.eventdetection))
Example #21
0
 def cleanbeads(self) -> Beads:
     "Return cleaned beads"
     return cast(Beads,
                 self.apply(*Tasks.defaulttasklist(self, Tasks.alignment)))
Example #22
0
 def processors(self, *args, copy=True):
     "returns an iterator over the result of provided tasks"
     procs = Tasks.processors(self, *args)
     procs.data.setcachedefault(0, self)
     procs.copy = copy
     return procs
Example #23
0
 def tasklist(self, *args):
     "creates a tasklist"
     return Tasks.tasklist(self.path, *args)