def balance_sampl(data_labels): labels_ind = [] labels_count = [] for label in range(out_dict_size): labels_ind.append(np.argwhere(data_labels == label).ravel()) labels_count.append(np.sum(data_labels == label)) #count_order = np.argsort(labels_count) count_mean = np.mean(labels_count) count_std = np.std(labels_count) up_bound = count_mean + count_std low_bound = count_mean - count_std if low_bound <= 0: low_bound = count_mean ind_out_temp = [] for ind_list, count in zip(labels_ind, labels_count): if count > up_bound: ind_list = randperm(ind_list) ind_out_temp.append(ind_list[:np.int(up_bound)]) elif count < low_bound and count > 0: rep_count = min(np.int(np.ceil(low_bound / float(count))), 3) for rep in range(rep_count): ind_list = np.hstack((ind_list, ind_list)) ind_out_temp.append(ind_list) else: ind_out_temp.append(ind_list) ind_out = np.array([]) for ind_list in ind_out_temp: ind_out = np.hstack((ind_out, ind_list)) ind_out = [np.int(numb) for numb in ind_out] return randperm(ind_out)
def balance_classes(y): from numpy.random import permutation as randperm from numpy import random random.seed(1) yuniq = np.unique(y) ycounts = [np.sum(y==yi) for yi in yuniq] ymax = np.max(ycounts) balidx = np.array([],int) for i,yi in enumerate(yuniq): yidx = np.where(y==yi)[0] if ycounts[i] < ymax: nadd = ymax-ycounts[i] perm = np.asarray(randperm(ycounts[i]),int) balidx = np.r_[balidx,yidx[perm[:nadd]]] return balidx
def balance_classes(y): from numpy.random import permutation as randperm from numpy import random random.seed(1) yuniq = np.unique(y) ycounts = [np.sum(y == yi) for yi in yuniq] ymax = np.max(ycounts) balidx = np.array([], int) for i, yi in enumerate(yuniq): yidx = np.where(y == yi)[0] if ycounts[i] < ymax: nadd = ymax - ycounts[i] perm = np.asarray(randperm(ycounts[i]), int) balidx = np.r_[balidx, yidx[perm[:nadd]]] return balidx
def corrupt_img(img, mode): if mode == 'A': ratio = 0.8 elif mode == 'B': ratio = 0.4 elif mode == 'C': ratio = 0.6 else: print('not implemented') rows, cols, channels = img.shape corr_img = img.copy() # for every rows, add some noise sub_noise_num = int(round(ratio * cols)) # for every channels, randomly choose some rows to remove for k in range(channels): for i in range(rows): tmp = randperm(cols) noise_idx = tmp[1:sub_noise_num] corr_img[i, noise_idx, k] = 0 return corr_img
def surrogate_calc(time_series: Union[TimeSeries, ndarray], N: int, method: str, pp: bool, fs: float) -> Tuple[ndarray, "Params"]: """ Calculates surrogates. :param time_series: the original signal as a TimeSeries :param N: the number of surrogates :param method: the required surrogate type :param pp: whether to perform preprocessing :param fs: the sampling frequency :return: the surrogate signal(s) and params """ if isinstance(time_series, TimeSeries): sig = time_series.signal else: sig = time_series surr = np.empty((N, len(sig)), dtype=np.float64) # TODO: check this params = Params() origsig = sig params.origsig = origsig params.method = method params.numsurr = N params.fs = fs if pp: sig, time, ks, ke = preprocessing(sig, fs) params.preprocessing = True params.cutsig = sig params.sigstart = ks params.sigend = ke else: time = np.linspace(0, len(sig), int(len(sig) / fs)) params.preprocessing = False L = len(sig) L2 = np.int(np.ceil(L / 2)) params.time = time # Random permutation surrogates. if method == _RP: for k in range(N): surr[k, :] = sig[randperm(L)] # Fourier transform surrogates. elif method == _FT: b = 2 * np.pi # Note: removed 'eta' parameter from function. eta = b * np.random.rand(N, L2 - 1) ftsig = np.fft.fft(sig, axis=0) ftrp = np.zeros((N, len(ftsig)), dtype=np.complex64) ftrp[:, 0] = ftsig[0] F = ftsig[1:L2] F = np.tile(F, (N, 1)) ftrp[:, 1:L2] = F * np.exp(1j * eta) ftrp[:, 1 + L - L2:L] = np.conj(np.fliplr(ftrp[:, 1:L2])) surr = np.fft.ifft(ftrp, axis=0) surr = np.real(surr) params.rphases = eta # Amplitude-adjusted Fourier transform surrogates. elif method == _AAFT: b = 2 * np.pi eta = b * np.random.rand(N, L2 - 1) val = np.sort(sig) ind = np.argsort(sig) rankind = np.empty(ind.shape, dtype=np.int) rankind[ind] = np.arange(0, L) gn = np.sort(np.random.randn(N, len(sig)), 1) for j in range(N): gn[j, :] = gn[j, rankind] ftgn = np.fft.fft(gn, axis=0) F = ftgn[:, 1:L2] surr = np.zeros((N, len(sig)), dtype=np.complex) surr[:, 0] = gn[:, 0] surr[:, 1:L2] = np.multiply(F, np.exp(np.complex(0, 1) * eta)) surr[:, 1 + L - L2:L] = np.conj(np.fliplr(surr[:, 1:L2])) surr = np.fft.ifft(surr, axis=0) ind2 = np.argsort(surr, axis=1) rrank = np.zeros((1, L), dtype=np.int) for k in range(N): rrank[:, ind2[k, :]] = np.arange(0, L) surr[k, :] = val[rrank] surr = np.real(surr) # Iterated amplitude-adjusted Fourier transform with exact distribution. elif method == _IAFFT1: maxit = 1000 val = np.sort(sig) ind = np.argsort(sig) rankind = np.empty(ind.shape, dtype=np.int) rankind[ind] = np.arange(0, L) ftsig = np.fft.fft(sig, axis=0) F = np.tile(ftsig, (N, 1)) surr = np.zeros((N, L)) for j in range(N): surr[j, :] = sig[randperm(L)] it = 1 irank = rankind.copy() irank = np.tile(irank, (N, 1)) irank2 = np.zeros((1, L)) oldrank = np.zeros((N, L)) iind = np.zeros((N, L)) iterf = iind.copy() while np.max(np.abs(oldrank - irank), axis=1) != 0 and it < maxit: go = np.max(np.abs(oldrank - irank), axis=1) go_c = go.conj().T inc = go_c[go_c != 0].nonzero() oldrank = irank.copy() iterf[inc, :] = np.real(np.fft.ifft(np.abs( F[inc, :]), axis=0)) * np.exp( 1j * np.angle(np.fft.fft(surr[inc, :], axis=1))) iind[inc, :] = np.sort(iterf[inc, :], axis=1) for k in range(inc): irank2[iind[k, :]] = np.arange(0, L) irank[k, :] = irank2.copy() surr[k, :] = val[irank2] it += 1 # Iterated amplitude-adjusted Fourier transform with exact spectrum. elif method == _IAFFT2: pass # Wavelet iterated amplitude adjusted Fourier transform surrogates elif method == _WIAFFT: pass # Time-shifted surrogates. elif method == _tshift: for sn in range(N): startp = random.randint(1, L - 1) surr[sn, :] = np.hstack([sig[startp:L], sig[:startp]]) # Cycle phase permutation surrogates. elif method == _CPP: signal = np.mod(sig, 2 * np.pi) dcpoints = np.nonzero((signal[1:] - signal[:-1]) < -np.pi) NC = len(dcpoints) - 1 if NC > 0: cycles = np.zeros(NC) for k in range(NC): cycles[k] = signal[dcpoints[k] + 1:dcpoints[k + 1]] stcycle = signal[:dcpoints[0]] endcycle = signal[dcpoints[k + 1] + 1:] for sn in range(N): surr[sn, :] = np.unwrap( np.hstack( [stcycle, cycles[np.random.permutation(NC), endcycle]])) else: for sn in range(N): surr[sn, :] = np.unwrap(signal) params.type = method params.numsurr = N if pp: params.preprocessing = True params.cutsig = sig params.sigstart = ks params.sigend = ke else: params.preprocessing = False params.time = time params.fs = fs return surr, params
def ba_init(x, y, K): """ Initializes max-affine fit to data (y, x) ensures that initialization has at least K+1 points per partition (i.e. per affine function) INPUTS: x: Independent variable data 2D column vector [nPoints x nDims] y: Dependent variable data 2D column vector [nPoints x 1] OUTPUTS: ba: Initial b and a parameters 2D array [(dimx+1) x k] """ defaults = {} defaults['bverbose'] = False options = defaults npt, dimx = x.shape X = hstack((ones((npt, 1)), x)) b = zeros((dimx+1, K)) if K*(dimx+1) > npt: raise ValueError('Not enough data points') # Choose K unique indices randinds = randperm(npt)[0:K] # partition based on distances sqdists = zeros((npt, K)) for k in range(K): sqdists[:, k] = ((x - tile(x[randinds[k], :], (npt, 1))) ** 2).sum(1) # index to closest k for each data pt mindistind = argmin(sqdists, axis=1) # loop through each partition, making local fits # note we expand partitions that result in singular least squares problems # why this way? some points will be shared by multiple partitions, but # resulting max-affine fit will tend to be good. (as opposed to solving least-norm version) for k in range(K): inds = mindistind == k # before fitting, check rank and increase partition size if necessary # (this does create overlaps) if matrix_rank(X[inds, :]) < dimx + 1: sortdistind = sqdists[:, k].argsort() i = sum(inds) # number of points in partition iinit = i if i < dimx+1: # obviously, at least need dimx+1 points. fill these in before # checking any ranks inds[sortdistind[i+1:dimx+1]] = 1 # TODO: check index i = dimx+1 # TODO: check index # now add points until rank condition satisfied while matrix_rank(X[inds, :]) < dimx+1: i = i+1 inds[sortdistind[i]] = 1 if options['bverbose']: print("ba_init: Added %s points to partition %s to maintain" "full rank for local fitting." % (i-iinit, k)) # now create the local fit b[:, k] = lstsq(X[inds.nonzero()], y[inds.nonzero()], rcond=-1)[0][:, 0] # Rank condition specified to default for python upgrades return b