Ejemplo n.º 1
0
    def _align(data, method='DAPI', upsample_factor=2, window=4):
        """Expects input array of dimensions (CYCLE, CHANNEL, I, J).
        If window is 
        """
        data = np.array(data)
        assert data.ndim == 4, 'Input data must have dimensions CYCLE, CHANNEL, I, J'

        # align between SBS channels for each cycle
        aligned = data.copy()
        align_it = lambda x: Align.align_within_cycle(
            x, window=window, upsample_factor=upsample_factor)
        aligned[:, 1:] = np.array([align_it(x) for x in aligned[:, 1:]])

        if method == 'DAPI':
            # align cycles using the DAPI channel
            aligned = Align.align_between_cycles(
                aligned,
                channel_index=0,
                window=window,
                upsample_factor=upsample_factor)
        elif method == 'SBS_mean':
            # calculate cycle offsets using the average of SBS channels
            target = Align.apply_window(aligned[:, 1:],
                                        window=window).max(axis=1)
            normed = Align.normalize_by_percentile(target)
            offsets = Align.calculate_offsets(normed,
                                              upsample_factor=upsample_factor)
            # apply cycle offsets to each channel
            for channel in range(aligned.shape[1]):
                aligned[:,
                        channel] = Align.apply_offsets(aligned[:, channel],
                                                       offsets)

        return aligned
Ejemplo n.º 2
0
    def _align_SBS(data,
                   method='DAPI',
                   upsample_factor=2,
                   window=2,
                   cutoff=1,
                   align_within_cycle=True,
                   keep_trailing=False,
                   n=1):
        """Rigid alignment of sequencing cycles and channels. 

        Expects `data` to be an array with dimensions (CYCLE, CHANNEL, I, J).
        A centered subset of data is used if `window` is greater 
        than one. Subpixel alignment is done if `upsample_factor` is greater than
        one (can be slow).
        """
        data = np.array(data)
        if keep_trailing:
            valid_channels = min([len(x) for x in data])
            data = np.array([x[-valid_channels:] for x in data])

        assert data.ndim == 4, 'Input data must have dimensions CYCLE, CHANNEL, I, J'

        # align between SBS channels for each cycle
        aligned = data.copy()
        if align_within_cycle:
            align_it = lambda x: Align.align_within_cycle(
                x, window=window, upsample_factor=upsample_factor)
            # if data.shape[1] == 4:
            #     n = 0
            #     align_it = lambda x: Align.align_within_cycle(x, window=window,
            #         upsample_factor=upsample_factor, cutoff=cutoff)
            # else:
            #     n = 1

            aligned[:, n:] = np.array([align_it(x) for x in aligned[:, n:]])

        if method == 'DAPI':
            # align cycles using the DAPI channel
            aligned = Align.align_between_cycles(
                aligned,
                channel_index=0,
                window=window,
                upsample_factor=upsample_factor)
        elif method == 'SBS_mean':
            # calculate cycle offsets using the average of SBS channels
            target = Align.apply_window(aligned[:, 1:],
                                        window=window).max(axis=1)
            normed = Align.normalize_by_percentile(target)
            normed[normed > cutoff] = cutoff
            offsets = Align.calculate_offsets(normed,
                                              upsample_factor=upsample_factor)
            # apply cycle offsets to each channel
            for channel in range(aligned.shape[1]):
                aligned[:,
                        channel] = Align.apply_offsets(aligned[:, channel],
                                                       offsets)

        return aligned
Ejemplo n.º 3
0
    def _align_SBS(data, method='DAPI', upsample_factor=2, window=2, cutoff=1, q_norm=70,
        align_within_cycle=True, cycle_files=None, keep_trailing=False, n=1, remove_for_cycle_alignment=None, rescale_channels=None, rescale_factors=None):
        """Rigid alignment of sequencing cycles and channels. 

        Expects `data` to be an array with dimensions (CYCLE, CHANNEL, I, J). 'n' 
        determines the first SBS channel in 'data'.
        A centered subset of data is used if `window` is greater 
        than one. Subpixel alignment is done if `upsample_factor` is greater than
        one (can be slow).
        
        If channel rebalancing is needed, insert list of channel indices (e.g.,Cy3=1, A594=2, etc)
        for "rescale_channels", and a list of the factors by which you want to multiply the channels' arrays for "rescale_factors".
        Channel rebalancing function is currently configured for 12 cycle experiments.
        """
        #description = ops.filenames.parse_filename(file,custom_patterns=file_pattern)

        if cycle_files is not None:
            arr = []
            # snakemake passes de-nested list of numpy arrays
            current = 0
            for cycle in cycle_files:
                #print(cycle)
                if cycle == 1:
                    arr.append(np.array(data[cycle-1]))
                    #print(np.shape(arr))
                    #current += cycle
                if cycle==len(cycle_files):
                    arr.append(np.array(data[cycle-1]))
                    data = np.array(arr)
                else:
                    arr.append(np.array(data[cycle-1]))
                    #print(np.shape(arr))
                    current += cycle

            #data = np.array(arr)

        else:
            data = np.array(data)
            
        if rescale_channels is not None:
            for num in range(0,12):
                for i in range(0,len(rescale_channels)):
                    data[num][rescale_channels[i]]=data[num][rescale_channels[i]]*rescale_factors[i]

        if keep_trailing != False | data.ndim==1:
            channels = [len(x) for x in data]
            stacked = np.array([x[-min(channels):] for x in data])
        else:
            stacked = data

        if keep_trailing == 'propagate_extras':
            extras = np.array(channels)-min(channels)
            arr = []
            for cycle,extra in enumerate(extras):
                if extra != 0:
                    arr.extend([data[cycle][extra_ch] for extra_ch in range(extra)])
            propagate = np.array(arr)
            stacked = np.concatenate((np.array([propagate]*stacked.shape[0]),stacked),axis=1)
        else:
            extras = [0,]*stacked.shape[0]

        # if data.ndim==1:
        #     # data stacked with different cycle numbers?
        #     # assume extra channels exist are on the first cycle, first channels
        #     # does not return extra channels
        #     channels = [len(x) for x in data]
        #     extra = max(channels) - min(channels)
        #     #stack channels in common
        #     stacked = np.array([data[0][extra:]]+[data[cycle] for cycle in range(1,data.shape[0])])
        #     #copy extra channels across other cycles
        #     stacked = np.concatenate((np.array([data[0][:extra]]*stacked.shape[0]),stacked),axis=1)
        # else:
        #     extra = 0
        #     stacked = data
        print(stacked.ndim)
        assert stacked.ndim == 4, 'Input data must have dimensions CYCLE, CHANNEL, I, J'

        # align between SBS channels for each cycle
        aligned = stacked.copy()
        if align_within_cycle:
            align_it = lambda x: Align.align_within_cycle(x, window=window, upsample_factor=upsample_factor)
            
            aligned[:, n:] = np.array([align_it(x) for x in aligned[:, n:]])
            

        if method == 'DAPI':
            # align cycles using the DAPI channel
            aligned = Align.align_between_cycles(aligned, channel_index=0, 
                                window=window, upsample_factor=upsample_factor)
        elif method == 'SBS_mean':
            # calculate cycle offsets using the average of SBS channels
            sbs_channels = list(range(n,aligned.shape[1]))
            if remove_for_cycle_alignment != None:
                sbs_channels.remove(remove_for_cycle_alignment)

            target = Align.apply_window(aligned[:, sbs_channels], window=window).max(axis=1)
            normed = Align.normalize_by_percentile(target, q_norm=q_norm)
            normed[normed > cutoff] = cutoff
            offsets = Align.calculate_offsets(normed, upsample_factor=upsample_factor)
            # apply cycle offsets to each channel
            for channel in range(aligned.shape[1]):
                if channel >= sum(extras):
                    aligned[:, channel] = Align.apply_offsets(aligned[:, channel], offsets)
                else:
                    # don't apply offsets to extra channel in the cycle it was acquired
                    offset_cycles = list(range(aligned.shape[0])).remove(list(np.cumsum(extras)>channel).index(True))
                    aligned[offset_cycles, channel] = Align.apply_offsets(aligned[offset_cycles, channel], offsets)

        return aligned