def generate_random_telegraph_noise( how_many: int = 20000, save_to_file: bool = True, filename: Optional[str] = None, ) -> np.ndarray: """ """ condensed_data_all = np.empty( [len(nt.config["core"]["data_types"]) - 1, 0, np.prod(N_2D)] ) for niter in range(how_many): condensed_data = np.empty( [len(nt.config["core"]["data_types"]) - 1, 1, np.prod(N_2D)] ) x = np.ones(N_2D) s = 1 # for n_switches in range(0, 1): lam = np.random.uniform(0, 0.2, 1) trnsp = np.random.randint(2, size=1) poisson = np.random.poisson(lam=lam, size=N_2D) poisson[poisson > 1] = 1 for ix in range(N_2D[0]): for iy in range(N_2D[0]): if poisson[ix, iy] == 1: s *= -1 x[ix, iy] *= s if trnsp: x = x.T x = (x + 1) / 2 noise_spect = fp.frequencies2(x) noise_spect = fp.frequenciesshift(noise_spect) noise_spect = np.abs(noise_spect) grad = generic_gradient_magnitude(x, sobel) index = nt.config["core"]["data_types"]["signal"] condensed_data[index, 0, :] = x.flatten() index = nt.config["core"]["data_types"]["frequencies"] condensed_data[index, 0, :] = noise_spect.flatten() index = nt.config["core"]["data_types"]["gradient"] condensed_data[index, 0, :] = grad.flatten() condensed_data_all = np.concatenate( (condensed_data_all, condensed_data), axis=1 ) if save_to_file: if filename is None: filename = "random_telegraph_noise.npy" path = os.path.join(nt.config["db_folder"], filename) np.save(path, condensed_data_all) return condensed_data_all
def add_random_charge_shifts( original_data: np.ndarray, number_of_samples: int, ) -> Tuple[np.ndarray, np.ndarray]: """ """ data = np.copy(original_data) data_idx = np.random.choice(original_data.shape[0], number_of_samples, replace=False).astype(int) org_shape = data.shape for idx in data_idx: ex_data = np.squeeze(data[idx]) n_diff = np.random.randint(5, 9, 1) min_d = int(np.floor(n_diff / 2)) n_step = np.random.randint(2, ex_data.shape[0], 1)[0] transpose = np.random.randint(2, size=1)[0] if transpose: new_img1 = np.concatenate( ( ex_data[:n_step, :], ex_data[n_step - min_d:n_step + min_d, :], ex_data[n_step + min_d:, :], ), axis=0, ) else: new_img1 = np.concatenate( ( ex_data[:, :n_step], ex_data[:, n_step - min_d:n_step + min_d], ex_data[:, n_step + min_d:], ), axis=1, ) new_img1 = resize(new_img1, (50, 50)) data[idx] = new_img1.reshape(1, *org_shape[1:]) m = data.shape[0] data = np.reshape(data, (m, *N_2D)) freq_data = fp.frequencies2(data) freq_data = np.abs(fp.frequenciesshift(freq_data)) data = data.reshape(*org_shape) freq_data = freq_data.reshape(*org_shape) return data, freq_data
def generate_current_drop( how_many: int = 20000, save_to_file: bool = True, filename: Optional[str] = None, ) -> np.ndarray: """ """ condensed_data_all = np.empty( [len(nt.config["core"]["data_types"]) - 1, 0, np.prod(N_2D)] ) for niter in range(how_many): condensed_data = np.empty( [len(nt.config["core"]["data_types"]) - 1, 1, np.prod(N_2D)] ) xm, ym = np.meshgrid(np.linspace(0, 50, 50), np.linspace(0, 50, 50)) drop = np.sqrt((xm + ym) ** 2) drop = (drop - np.min(drop)) / (np.max(drop) - np.min(drop)) amp = np.random.uniform(0, 10, 1) offset = np.random.uniform(-5, 5, 1) drop = np.tanh(amp * drop + offset) drop = (drop - np.min(drop)) / (np.max(drop) - np.min(drop)) drop_freq = fp.frequencies2(drop) drop_freq = fp.frequenciesshift(drop_freq) drop_freq = np.abs(drop_freq) grad = generic_gradient_magnitude(drop, sobel) index = nt.config["core"]["data_types"]["signal"] condensed_data[index, 0, :] = drop.flatten() index = nt.config["core"]["data_types"]["frequencies"] condensed_data[index, 0, :] = drop_freq.flatten() index = nt.config["core"]["data_types"]["gradient"] condensed_data[index, 0, :] = grad.flatten() condensed_data_all = np.concatenate( (condensed_data_all, condensed_data), axis=1 ) if save_to_file: if filename is None: filename = "current_drop.npy" path = os.path.join(nt.config["db_folder"], filename) np.save(path, condensed_data_all) return condensed_data_all
def load_noise( noise_types: List[str], number_of_samples: int, files: Optional[Dict[str, str]] = None, folder: Optional[str] = None, ) -> np.ndarray: """ Note: complex numbers are cast into floats here, might need to fix this of frequencies do not give desired result """ if files is None: files = DEFAULT_FILES if folder is None: folder = nt.config["db_folder"] all_noise = {} # np.zeros((len(noise_types), 2, number_of_samples, *N_2D)) # noise_idx = [] for ntype in noise_types: if ntype not in NOISE_TYPES: logger.error("Unknown noise type. Choose one of the following: " + " {}".format(", ".join(NOISE_TYPES))) raise ValueError noise_idx = NOISE_TYPES.index(ntype) raw_noise = np.load(os.path.join(folder, DEFAULT_FILES[ntype])) raw_noise = np.reshape(raw_noise[0, :, :], (raw_noise.shape[1], *N_2D)) raw_noise = raw_noise[np.random.choice(len(raw_noise), number_of_samples, replace=True).astype(int)] raw_noise_freq = fp.frequencies2(raw_noise) raw_noise_freq = fp.frequenciesshift(raw_noise_freq) all_noise[ntype] = np.zeros((2, number_of_samples, *N_2D)) all_noise[ntype][0] = raw_noise all_noise[ntype][1] = raw_noise_freq.real return all_noise
def generate_random_blobs( how_many: int = 20000, save_to_file: bool = True, filename: Optional[str] = None, n_blobs: int = 15, stdx: Optional[List[float]] = None, stdy: Optional[List[float]] = None, ) -> np.ndarray: """ """ if stdx is None: stdx = [0.3, 0.8] if stdy is None: stdy = [0.3, 0.8] condensed_data_all = np.empty( [len(nt.config["core"]["data_types"]) - 1, 0, np.prod(N_2D)] ) for niter in range(how_many): condensed_data = np.empty( [len(nt.config["core"]["data_types"]) - 1, 1, np.prod(N_2D)] ) x = np.linspace(-1, 1) y = np.linspace(-1, 1) x, y = np.meshgrid(x, y) z = np.zeros(N_2D) for n_blob in range(n_blobs): z += gauss2d( x, y, mx=np.random.uniform(-1, 1, 1), my=np.random.uniform(-1, 1, 1), sx=np.random.uniform(*stdx, 1), sy=np.random.uniform(*stdy, 1), ) z = (z - np.min(z)) / (np.max(z) - np.min(z)) noise_spect = fp.frequencies2(z) noise_spect = fp.frequenciesshift(noise_spect) noise_spect = np.abs(noise_spect) grad = generic_gradient_magnitude(z, sobel) index = nt.config["core"]["data_types"]["signal"] condensed_data[index, 0, :] = z.flatten() index = nt.config["core"]["data_types"]["frequencies"] condensed_data[index, 0, :] = noise_spect.flatten() index = nt.config["core"]["data_types"]["gradient"] condensed_data[index, 0, :] = grad.flatten() condensed_data_all = np.concatenate( (condensed_data_all, condensed_data), axis=1 ) if save_to_file: if filename is None: filename = "random_blobs.npy" path = os.path.join(nt.config["db_folder"], filename) np.save(path, condensed_data_all) return condensed_data_all
def generate_one_f_noise( how_many: int = 20000, save_to_file: bool = True, filename: Optional[str] = None, ) -> np.ndarray: """ """ fx_1d = fp.frequenciesshift(fp.frequenciesfreq(1000, d=0.02)) condensed_data_all = np.empty( [len(nt.config["core"]["data_types"]) - 1, 0, np.prod(N_2D)] ) for niter in range(how_many): condensed_data = np.empty( [len(nt.config["core"]["data_types"]) - 1, 1, np.prod(N_2D)] ) fx, fy = np.meshgrid(fx_1d, fx_1d, indexing="ij") f = np.sqrt(fx ** 2 + fy ** 2) f[f > 0] = np.divide(1, f[f > 0]) # if low_pass_cutoff is not None: # f[f > low_pass_cutoff] = 0 # if high_pass_cutoff is not None: # f[f < high_pass_cutoff] = 0 exponents = np.random.uniform(low=0, high=2 * np.pi, size=f.shape) power_spect = np.multiply(f, np.exp(1j * exponents)) noise = np.abs(fp.ifrequencies2(power_spect)) noise = (noise - np.min(noise)) / (np.max(noise) - np.min(noise)) grad = generic_gradient_magnitude(noise, sobel) noise = resize(noise, N_2D, anti_aliasing=True, mode="constant").flatten() grad = resize(grad, N_2D, anti_aliasing=True, mode="constant").flatten() power_spect = resize( np.abs(power_spect), N_2D, anti_aliasing=True, mode="constant" ).flatten() index = nt.config["core"]["data_types"]["signal"] condensed_data[index, 0, :] = noise index = nt.config["core"]["data_types"]["frequencies"] condensed_data[index, 0, :] = power_spect index = nt.config["core"]["data_types"]["gradient"] condensed_data[index, 0, :] = grad condensed_data_all = np.concatenate( (condensed_data_all, condensed_data), axis=1 ) if save_to_file: if filename is None: filename = "one_over_f_noise.npy" path = os.path.join(nt.config["db_folder"], filename) np.save(path, condensed_data_all) return condensed_data_all
def save_augmented_data( original_raw_data: np.ndarray, new_path: str, new_filename: str, mult_factor: int, write_period: int = 200, max_samples: int = 20000, data_types: List[str] = ["signal", "frequencies"], ) -> None: """ """ # TODO: Is this method finished? total_counter = 0 write_curr = 0 shape = (50, 50) new_path = os.path.join(new_path, new_filename) index_sig = nt.config["core"]["data_types"]["signal"] index_freq = nt.config["core"]["data_types"]["frequencies"] index_grad = nt.config["core"]["data_types"]["gradient"] n_indx = len(nt.config["core"]["data_types"]) condensed_data_all = np.empty((n_indx, 0, np.prod(shape) + 1)) original_images = np.squeeze(original_raw_data[index_sig, :, :-1]) print(original_images.shape) original_labels = original_raw_data[:, :, -1][0] print(original_labels.shape) if not os.path.exists(new_path): np.save(new_path, condensed_data_all) stop = False for it in range(mult_factor): for orig_image, orig_label in zip(original_images, original_labels): # print(orig_image.shape) orig_image = orig_image.reshape(50, 50) condensed_data = np.empty((n_indx, 1, np.prod(shape) + 1)) new_img = random_transformation(orig_image, single=False) condensed_data[index_sig, 0, :] = np.append(new_img.flatten(), orig_label) dtrnd = sg.detrend(new_img, axis=0) dtrnd = sg.detrend(dtrnd, axis=1) frequencies_res = fp.frequencies2(dtrnd) frequencies_res = np.abs(fp.frequenciesshift(frequencies_res)) data_frq = resize( frequencies_res, (50, 50), anti_aliasing=True, mode="constant" ).flatten() condensed_data[index_freq, 0, :] = np.append(data_frq, orig_label) # labels_all.append(orig_label) grad = generic_gradient_magnitude(new_img, sobel) gradient_resized = resize( grad, shape, anti_aliasing=True, mode="constant" ).flatten() condensed_data[index_grad, 0, :] = np.append(gradient_resized, orig_label) condensed_data_all = np.append(condensed_data_all, condensed_data, axis=1) write_curr += 1 total_counter += 1 if write_curr >= write_period: # save to file n = list(condensed_data_all.shape) n[-1] += 1 previous_data = np.load(new_path) all_data = np.append(previous_data, condensed_data_all, axis=1) np.save(new_path, all_data) condensed_data_all = np.empty((n_indx, 0, np.prod(shape) + 1)) write_curr = 0 if total_counter >= max_samples: stop = True break if stop: break previous_data = np.load(new_path) all_data = np.append(previous_data, condensed_data_all, axis=1) np.save(new_path, all_data)
def add_noise( original_data: np.ndarray, noise_types: List[str], max_strength: List[float], n_samples: Optional[int] = None, in_current: bool = True, min_strength: Optional[List[float]] = None, ) -> Tuple[np.ndarray, np.ndarray]: """ """ assert len(noise_types) == len(max_strength) noisy_data = np.copy(original_data) m = noisy_data.shape[0] if min_strength is None: min_strength = [0] * len(noise_types) if n_samples is None: n_samples = m if not in_current: org_max = np.max(noisy_data.reshape(m, -1), axis=1) noisy_data = np.reshape(noisy_data, (m, *N_2D)) noisy_freq = fp.frequencies2(noisy_data) noisy_freq = fp.frequenciesshift(noisy_freq) noisy_data = np.reshape(noisy_data, (m, -1)) raw_noise = load_noise(noise_types, m) for inn, ntype in enumerate(noise_types): if ntype not in NOISE_TYPES: logger.error("Unknown noise type. Choose one of the following: " + " {}".format(", ".join(NOISE_TYPES))) raise ValueError # if ntype in ['current_drop', 'random_blobs']: # min_strength[inn] = np.min(1, max_strength[inn]) amp = np.random.uniform(min_strength[inn], max_strength[inn], (n_samples, 1)) amp = np.append(amp, np.zeros((m - n_samples, 1))) p = np.random.permutation(len(amp)) amp = amp[p].reshape(m, 1) if in_current: noise = raw_noise[ntype][0] old_max = np.max(noisy_data, axis=1).reshape(m, 1) if ntype in ["current_drop", "random_blobs"]: # amp[amp=0] = 1 noise = amp * noise.reshape(m, -1) idx = np.where(amp == 0)[0] noise[idx] = np.ones(noise.shape[-1]) noisy_data = noisy_data * noise else: noisy_data = noisy_data + amp * noise.reshape(m, -1) new_max = np.max(noisy_data, axis=1).reshape(m, 1) noisy_data = noisy_data * old_max / new_max else: noise = raw_noise[ntype][1] if ntype in ["current_drop", "random_blobs"]: noise = amp * noise.reshape(m, -1) idx = np.where(amp == 0)[0] noise[idx] = np.ones(noise.shape[-1]) noisy_freq = noisy_freq * noise else: noisy_freq = noisy_freq + amp * noise.reshape(m, -1) if in_current: noisy_data = np.reshape(noisy_data, (m, *N_2D)) noisy_freq = fp.frequencies2(noisy_data) noisy_freq = fp.frequenciesshift(noisy_freq) else: noisy_freq = np.reshape(noisy_freq, (m, *N_2D)) noisy_data = np.abs(fp.ifrequencies2(noisy_freq)) new_max = np.max(noisy_data, axis=1).reshape(m, 1) noisy_data = noisy_data * org_max / new_max noisy_freq = np.reshape(noisy_freq, (m, *N_2D, 1)) noisy_data = np.reshape(noisy_data, (m, *N_2D, 1)) # noisy_data = (noisy_data - np.min(noisy_data))/(np.max(noisy_data) - np.min(noisy_data)) * 0.3 # nmin = np.min(images, axis=(1, 2)).reshape(-1, 1) # nmax = np.max(images, axis=(1, 2)).reshape(-1, 1) # images = images.reshape(images.shape[0], -1) # images = (images - nmin)/(nmax - nmin) return noisy_data, noisy_freq