Ejemplo n.º 1
0
def balance_sampl(data_labels):
    labels_ind = []
    labels_count = []
    for label in range(out_dict_size):
        labels_ind.append(np.argwhere(data_labels == label).ravel())
        labels_count.append(np.sum(data_labels == label))

    #count_order = np.argsort(labels_count)
    count_mean = np.mean(labels_count)
    count_std = np.std(labels_count)

    up_bound = count_mean + count_std
    low_bound = count_mean - count_std
    if low_bound <= 0:
        low_bound = count_mean

    ind_out_temp = []
    for ind_list, count in zip(labels_ind, labels_count):
        if count > up_bound:
            ind_list = randperm(ind_list)
            ind_out_temp.append(ind_list[:np.int(up_bound)])

        elif count < low_bound and count > 0:
            rep_count = min(np.int(np.ceil(low_bound / float(count))), 3)
            for rep in range(rep_count):
                ind_list = np.hstack((ind_list, ind_list))

            ind_out_temp.append(ind_list)

        else:
            ind_out_temp.append(ind_list)

    ind_out = np.array([])
    for ind_list in ind_out_temp:
        ind_out = np.hstack((ind_out, ind_list))

    ind_out = [np.int(numb) for numb in ind_out]
    return randperm(ind_out)
Ejemplo n.º 2
0
def balance_classes(y):
    from numpy.random import permutation as randperm
    from numpy import random

    random.seed(1)
    yuniq = np.unique(y)
    ycounts = [np.sum(y==yi) for yi in yuniq]
    ymax = np.max(ycounts)
    balidx = np.array([],int)
    for i,yi in enumerate(yuniq):
        yidx = np.where(y==yi)[0]
        if ycounts[i] < ymax:
            nadd = ymax-ycounts[i]
            perm = np.asarray(randperm(ycounts[i]),int)
            balidx = np.r_[balidx,yidx[perm[:nadd]]]
    return balidx
Ejemplo n.º 3
0
def balance_classes(y):
    from numpy.random import permutation as randperm
    from numpy import random

    random.seed(1)
    yuniq = np.unique(y)
    ycounts = [np.sum(y == yi) for yi in yuniq]
    ymax = np.max(ycounts)
    balidx = np.array([], int)
    for i, yi in enumerate(yuniq):
        yidx = np.where(y == yi)[0]
        if ycounts[i] < ymax:
            nadd = ymax - ycounts[i]
            perm = np.asarray(randperm(ycounts[i]), int)
            balidx = np.r_[balidx, yidx[perm[:nadd]]]
    return balidx
Ejemplo n.º 4
0
def corrupt_img(img, mode):
    if mode == 'A':
        ratio = 0.8
    elif mode == 'B':
        ratio = 0.4
    elif mode == 'C':
        ratio = 0.6
    else:
        print('not implemented')

    rows, cols, channels = img.shape
    corr_img = img.copy()
    #  for every rows, add some noise
    sub_noise_num = int(round(ratio * cols))
    # for every channels, randomly choose some rows to remove
    for k in range(channels):
        for i in range(rows):
            tmp = randperm(cols)
            noise_idx = tmp[1:sub_noise_num]
            corr_img[i, noise_idx, k] = 0
    return corr_img
Ejemplo n.º 5
0
def surrogate_calc(time_series: Union[TimeSeries,
                                      ndarray], N: int, method: str, pp: bool,
                   fs: float) -> Tuple[ndarray, "Params"]:
    """
    Calculates surrogates.

    :param time_series: the original signal as a TimeSeries
    :param N: the number of surrogates
    :param method: the required surrogate type
    :param pp: whether to perform preprocessing
    :param fs: the sampling frequency
    :return: the surrogate signal(s) and params
    """
    if isinstance(time_series, TimeSeries):
        sig = time_series.signal
    else:
        sig = time_series

    surr = np.empty((N, len(sig)), dtype=np.float64)  # TODO: check this

    params = Params()
    origsig = sig
    params.origsig = origsig
    params.method = method
    params.numsurr = N
    params.fs = fs

    if pp:
        sig, time, ks, ke = preprocessing(sig, fs)
        params.preprocessing = True
        params.cutsig = sig
        params.sigstart = ks
        params.sigend = ke
    else:
        time = np.linspace(0, len(sig), int(len(sig) / fs))
        params.preprocessing = False

    L = len(sig)
    L2 = np.int(np.ceil(L / 2))

    params.time = time

    # Random permutation surrogates.
    if method == _RP:
        for k in range(N):
            surr[k, :] = sig[randperm(L)]

    # Fourier transform surrogates.
    elif method == _FT:
        b = 2 * np.pi

        # Note: removed 'eta' parameter from function.
        eta = b * np.random.rand(N, L2 - 1)

        ftsig = np.fft.fft(sig, axis=0)
        ftrp = np.zeros((N, len(ftsig)), dtype=np.complex64)
        ftrp[:, 0] = ftsig[0]

        F = ftsig[1:L2]
        F = np.tile(F, (N, 1))

        ftrp[:, 1:L2] = F * np.exp(1j * eta)
        ftrp[:, 1 + L - L2:L] = np.conj(np.fliplr(ftrp[:, 1:L2]))

        surr = np.fft.ifft(ftrp, axis=0)
        surr = np.real(surr)

        params.rphases = eta

    # Amplitude-adjusted Fourier transform surrogates.
    elif method == _AAFT:
        b = 2 * np.pi
        eta = b * np.random.rand(N, L2 - 1)

        val = np.sort(sig)
        ind = np.argsort(sig)
        rankind = np.empty(ind.shape, dtype=np.int)
        rankind[ind] = np.arange(0, L)

        gn = np.sort(np.random.randn(N, len(sig)), 1)
        for j in range(N):
            gn[j, :] = gn[j, rankind]

        ftgn = np.fft.fft(gn, axis=0)
        F = ftgn[:, 1:L2]

        surr = np.zeros((N, len(sig)), dtype=np.complex)
        surr[:, 0] = gn[:, 0]
        surr[:, 1:L2] = np.multiply(F, np.exp(np.complex(0, 1) * eta))
        surr[:, 1 + L - L2:L] = np.conj(np.fliplr(surr[:, 1:L2]))
        surr = np.fft.ifft(surr, axis=0)

        ind2 = np.argsort(surr, axis=1)
        rrank = np.zeros((1, L), dtype=np.int)
        for k in range(N):
            rrank[:, ind2[k, :]] = np.arange(0, L)
            surr[k, :] = val[rrank]

        surr = np.real(surr)

    # Iterated amplitude-adjusted Fourier transform with exact distribution.
    elif method == _IAFFT1:
        maxit = 1000
        val = np.sort(sig)
        ind = np.argsort(sig)

        rankind = np.empty(ind.shape, dtype=np.int)
        rankind[ind] = np.arange(0, L)

        ftsig = np.fft.fft(sig, axis=0)
        F = np.tile(ftsig, (N, 1))
        surr = np.zeros((N, L))

        for j in range(N):
            surr[j, :] = sig[randperm(L)]

        it = 1
        irank = rankind.copy()
        irank = np.tile(irank, (N, 1))
        irank2 = np.zeros((1, L))
        oldrank = np.zeros((N, L))
        iind = np.zeros((N, L))
        iterf = iind.copy()

        while np.max(np.abs(oldrank - irank), axis=1) != 0 and it < maxit:
            go = np.max(np.abs(oldrank - irank), axis=1)
            go_c = go.conj().T

            inc = go_c[go_c != 0].nonzero()

            oldrank = irank.copy()
            iterf[inc, :] = np.real(np.fft.ifft(np.abs(
                F[inc, :]), axis=0)) * np.exp(
                    1j * np.angle(np.fft.fft(surr[inc, :], axis=1)))

            iind[inc, :] = np.sort(iterf[inc, :], axis=1)
            for k in range(inc):
                irank2[iind[k, :]] = np.arange(0, L)
                irank[k, :] = irank2.copy()
                surr[k, :] = val[irank2]

            it += 1

    # Iterated amplitude-adjusted Fourier transform with exact spectrum.
    elif method == _IAFFT2:
        pass

    # Wavelet iterated amplitude adjusted Fourier transform surrogates
    elif method == _WIAFFT:
        pass

    # Time-shifted surrogates.
    elif method == _tshift:
        for sn in range(N):
            startp = random.randint(1, L - 1)
            surr[sn, :] = np.hstack([sig[startp:L], sig[:startp]])

    # Cycle phase permutation surrogates.
    elif method == _CPP:
        signal = np.mod(sig, 2 * np.pi)

        dcpoints = np.nonzero((signal[1:] - signal[:-1]) < -np.pi)
        NC = len(dcpoints) - 1

        if NC > 0:
            cycles = np.zeros(NC)

            for k in range(NC):
                cycles[k] = signal[dcpoints[k] + 1:dcpoints[k + 1]]

            stcycle = signal[:dcpoints[0]]
            endcycle = signal[dcpoints[k + 1] + 1:]

            for sn in range(N):
                surr[sn, :] = np.unwrap(
                    np.hstack(
                        [stcycle, cycles[np.random.permutation(NC),
                                         endcycle]]))

        else:
            for sn in range(N):
                surr[sn, :] = np.unwrap(signal)

    params.type = method
    params.numsurr = N
    if pp:
        params.preprocessing = True
        params.cutsig = sig
        params.sigstart = ks
        params.sigend = ke
    else:
        params.preprocessing = False

    params.time = time
    params.fs = fs

    return surr, params
Ejemplo n.º 6
0
def ba_init(x, y, K):
    """
    Initializes max-affine fit to data (y, x)
    ensures that initialization has at least K+1 points per partition (i.e.
    per affine function)

    INPUTS:
        x:      Independent variable data
                    2D column vector [nPoints x nDims]

        y:      Dependent variable data
                    2D column vector [nPoints x 1]

    OUTPUTS:
        ba:     Initial b and a parameters
                    2D array [(dimx+1) x k]

    """
    defaults = {}
    defaults['bverbose'] = False
    options = defaults

    npt, dimx = x.shape

    X = hstack((ones((npt, 1)), x))
    b = zeros((dimx+1, K))

    if K*(dimx+1) > npt:
        raise ValueError('Not enough data points')

    # Choose K unique indices
    randinds = randperm(npt)[0:K]

    # partition based on distances
    sqdists = zeros((npt, K))
    for k in range(K):
        sqdists[:, k] = ((x - tile(x[randinds[k], :], (npt, 1))) ** 2).sum(1)

    # index to closest k for each data pt
    mindistind = argmin(sqdists, axis=1)

    # loop through each partition, making local fits
    # note we expand partitions that result in singular least squares problems
    # why this way? some points will be shared by multiple partitions, but
    # resulting max-affine fit will tend to be good. (as opposed to solving least-norm version)
    for k in range(K):
        inds = mindistind == k

        # before fitting, check rank and increase partition size if necessary
        # (this does create overlaps)
        if matrix_rank(X[inds, :]) < dimx + 1:
            sortdistind = sqdists[:, k].argsort()

            i = sum(inds)  # number of points in partition
            iinit = i

            if i < dimx+1:
                # obviously, at least need dimx+1 points. fill these in before
                # checking any ranks
                inds[sortdistind[i+1:dimx+1]] = 1  # TODO: check index
                i = dimx+1  # TODO: check index

            # now add points until rank condition satisfied
            while matrix_rank(X[inds, :]) < dimx+1:
                i = i+1
                inds[sortdistind[i]] = 1

            if options['bverbose']:
                print("ba_init: Added %s points to partition %s to maintain"
                      "full rank for local fitting." % (i-iinit, k))
        # now create the local fit
        b[:, k] = lstsq(X[inds.nonzero()], y[inds.nonzero()], rcond=-1)[0][:, 0]
        # Rank condition specified to default for python upgrades


    return b