def gen(l, m, n, number_of_instances, noise): assert(l <= m and m <= n) # precondition # balanced dataset p_number_of_instances = int(number_of_instances / 2) n_number_of_instances = number_of_instances - p_number_of_instances # positive example p_y = np.ones((p_number_of_instances, 1), dtype=int) p_x_first_part = np.zeros((p_number_of_instances, m), dtype=int) p_x_second_part = (np.random.random((p_number_of_instances, n-m)) < 0.5) p_x_second_part = p_x_second_part.astype(int) # columnwise append p_x = np.append(p_x_first_part, p_x_second_part, axis=1) for i in range(p_number_of_instances): candidates = np.random.permutation(m) n_nonzeros = l active_features = candidates[:n_nonzeros] p_x[i][active_features] = 1 # set non_zeros to 1 # negative example n_y = -1 * np.ones((n_number_of_instances, 1), dtype=int) n_x_first_part = np.zeros((n_number_of_instances, m), dtype=int) n_x_second_part = (np.random.random((n_number_of_instances, n-m)) < 0.5) n_x_second_part = n_x_second_part.astype(int) # columnwise append n_x = np.append(n_x_first_part, n_x_second_part, axis=1) for i in range(n_number_of_instances): candidates = np.random.permutation(m) n_nonzeros = l - 2 active_features = candidates[:n_nonzeros] n_x[i][active_features] = 1 # set non_zero to 1 y = np.append(p_y, n_y) x = np.append(p_x, n_x, axis=0) shuffle_indices = np.random.permutation(number_of_instances) y = y[shuffle_indices] x = x[shuffle_indices][:] # sanity check validate_dataset(y, x, l, m, n, number_of_instances) if noise is True: noise_y_rate = 0.05 noise_x_rate = 0.001 return add_noise(y, x, noise_y_rate, noise_x_rate) return (y, x)
max_features = params['max_features'] max_depth = params['max_depth'] loss = params['loss'] learning_rate = params['learning_rate'] missing_phenos = y[y.isnull()].index.values y = y.drop(missing_phenos, axis=0) geno_c = geno.copy() geno_c = geno_c.drop(missing_phenos, axis=0) r2s = [] y = y.to_numpy() for (j, noise_ratio) in enumerate(noise_ratios): r2s_n = [] for k in np.arange(M): y_n = add_noise(y, noise_ratio) X_train, X_test, y_train, y_test = train_test_split(geno_c, y_n, test_size=0.3) X_train = X_train.drop(columns=["Unnamed: 0"]).values X_test = X_test.drop(columns=["Unnamed: 0"]).values # ESTANADRIZANDO COMO SE DEBE y_train_std = (y_train - np.mean(y_train)) / np.std(y_train) y_test_std = (y_test - np.mean(y_train)) / np.std(y_train) rf = GradientBoostingRegressor(n_estimators = n_estimators, min_samples_split = min_samples_split,\ min_samples_leaf = min_samples_leaf, max_features = max_features,\ max_depth = max_depth, loss = loss, learning_rate = learning_rate, subsample = 1)
import numpy as np from add_noise import add_noise from simulated_annealing import simulated_anealing ################################ # read image and add noise # ################################ # read noise free image img_noise_free = mpimg.imread('test1.bmp') # convert it to gray level (two dimensions matrix instead of three dimensions matrix) # convert to range [0,1] img_noise_free_gl = np.double(np.average(img_noise_free, weights=[0.299, 0.587, 0.114], axis=2))/255 # add gaussian noise to image variance = 0.05 img_gaussian_noise = add_noise(img=img_noise_free_gl, mean=0, variance=variance) ############################################ # Find Parameters For singleton clique # ############################################ # prior of each class- equal priors is assumed priors = {0: 1/3, 127: 1/3, 255: 1/3} # three regions in noisy image that belongs to different classes. # we use these three region for estimating mean and standard deviation # of each class. regions = {0: (15, 15), 127: (120, 15), 255: (171, 149)} len_of_region = 50 # calculate P(feature|class i), each of them it's an gaussian distribution conditional_p = {0: None, 127: None, 255: None}
def save_distributed_files(truth_type=None, corpus=None): """ Save input and processed files for tests @param truth_type None for both, 'raw' for non processed and 'eq' for equalized """ print("Started saving chunks of data") corpus_len = len(corpus) max_val = 0 max_stft_len = 0 # Find maximum length of time series data to pad for i in range(corpus_len): if len(corpus[i].data) > max_val: max_val = len(corpus[i].data) is_eq = False is_raw = False is_both = True if truth_type == 'eq': is_eq = True is_both = False elif truth_type == 'raw': is_raw = True is_both = False X = [] y_eq = None y_raw = None if is_eq or is_both: y_eq = [] if is_raw or is_both: y_raw = [] memory_counter = 0 pad_length = 0 total_time = 0 # Get each sentence from corpus and add random noise/echos/both to the input # and preprocess the output. Also pad the signals to the max_val for i in range(corpus_len): start = datetime.datetime.now() pad_length = max_val + NFFT // 2 if pad_length % STACKED_FRAMES != 0: pad_length += (STACKED_FRAMES - (pad_length % STACKED_FRAMES)) # Original data in time domain data_orig_td = corpus[i].data.astype(np.float64) yi = pad(deepcopy(data_orig_td), pad_length) # Sampling frequency fs = corpus[i].fs # Pad transformed signals echosample = add_echoes(data_orig_td) noisesample = add_noise(data_orig_td, path(NOISE_DIR, "RainNoise.flac")) orig_sample = data_orig_td echosample = pad(echosample, pad_length) noisesample = pad(noisesample, pad_length) orig_sample = pad(orig_sample, pad_length) # Equalize data for high frequency hearing loss data_eq = None if is_eq or is_both: data_eq, _ = process_sentence(yi, fs=fs) yi_stft_eq = librosa.core.stft(data_eq, n_fft=NFFT, hop_length=HOP_LENGTH, center=True) yi_stft_eq = librosa.util.normalize(yi_stft_eq, axis=0) y_eq.append(np.abs(yi_stft_eq).T) # Use non processed input and pad as well data_raw = None if is_raw or is_both: data_raw = deepcopy(yi) yi_stft_raw = librosa.core.stft(data_raw, n_fft=NFFT, hop_length=HOP_LENGTH, center=True) yi_stft_raw = librosa.util.normalize(yi_stft_raw, axis=0) y_raw.append(np.abs(yi_stft_raw).T) #randomise which sample is input rand = random.randint(0, 1) random_sample_stft = None if rand == 0: random_sample_stft = librosa.core.stft(noisesample, n_fft=NFFT, hop_length=HOP_LENGTH, center=True) else: random_sample_stft = librosa.core.stft(orig_sample, n_fft=NFFT, hop_length=HOP_LENGTH, center=True) max_stft_len = random_sample_stft.shape[1] random_sample_stft = librosa.util.normalize(random_sample_stft, axis=0) X.append(np.abs(random_sample_stft).T) # print("Padded {}".format(i)) dt = datetime.datetime.now() - start total_time += dt.total_seconds() * 1000 avg_time = total_time / (i + 1) if (i % CHUNK == CHUNK - 1): print("Time taken for {}: {}ms".format(i, (i + 1) * avg_time)) print("Saving temp npy file to CHUNK {}".format(memory_counter)) # Convert to np arrays size = 0 if is_eq or is_both: y_eq_temp = np.array(y_eq) size += sys.getsizeof(y_eq_temp) if is_raw or is_both: y_raw_temp = np.array(y_raw) size += sys.getsizeof(y_raw_temp) X_temp = np.array(X) size += sys.getsizeof(X_temp) print("Memory used: {}".format(size / (1024 * 1024))) # Save files np.save(os.path.join(PP_DATA_DIR, "model", "inputs_{}.npy".format(memory_counter)), X_temp, allow_pickle=True) if is_eq or is_both: np.save( os.path.join(PP_DATA_DIR, "model", "truths_eq_{}.npy".format(memory_counter)), y_eq_temp, allow_pickle=True) if is_raw or is_both: np.save(os.path.join( PP_DATA_DIR, "model", "truths_raw_{}.npy".format(memory_counter)), y_raw_temp, allow_pickle=True) X = [] y_eq = None y_raw = None if is_eq or is_both: y_eq = [] if is_raw or is_both: y_raw = [] memory_counter += 1 if corpus_len % CHUNK > 0: # Convert to np arrays if is_eq or is_both: y_eq_temp = np.array(y_eq) if is_raw or is_both: y_raw_temp = np.array(y_raw) X_temp = np.array(X) end_len = len(X) # Save temp files np.save(os.path.join(PP_DATA_DIR, "model", "inputs_{}.npy".format(memory_counter)), X_temp, allow_pickle=True) if is_eq or is_both: np.save(os.path.join(PP_DATA_DIR, "model", "truths_eq_{}.npy".format(memory_counter)), y_eq_temp, allow_pickle=True) if is_raw or is_both: np.save(os.path.join(PP_DATA_DIR, "model", "truths_raw_{}.npy".format(memory_counter)), y_raw_temp, allow_pickle=True) print("Saved blocks {}:{}".format(0, memory_counter * CHUNK + end_len)) memory_counter += 1 memory_counter = np.array(memory_counter) max_stft_len = np.array(max_stft_len) corpus_len = np.array(corpus_len) np.save(os.path.join(PP_DATA_DIR, "model", "memory_counter"), memory_counter) np.save(os.path.join(PP_DATA_DIR, "model", "max_stft_len"), max_stft_len) np.save(os.path.join(PP_DATA_DIR, "model", "corpus_len"), corpus_len) return memory_counter, max_stft_len, truth_type, corpus_len
def diffusion(): s = 0.4 m = 0.3 steps = 500 ksize = 9 sigma = 1.5 gradksize = 9 gradsigma = 1.5 errors = [] L = lab4.get_cameraman() L_noise = add_noise(L, 10) #L_noise = add_noise(L, 8) #L_noise = add_noise(L, 12) # ----------------- Multiplicative noise ------------------- #L_log = np.log(L) #L_noise = add_noise(L_log, 40) #L_noise = np.exp(L_noise) # L = lab4.make_circle(128, 128, 120) * 255 # gaussian = np.random.normal(0, 10, (L.shape[0], L.shape[1])) # L = L + gaussian fig1 = plt.figure(1) fig1.suptitle("Original Image w/ noise") plt.imshow(L) fig2 = plt.figure(2) fig2.suptitle("Enhanced Image") L_init = np.copy(L_noise) ax1 = plt.subplot(111) im1 = ax1.imshow(L) def init_func(): pass def update(i): nonlocal L_noise T = estimate_T(L_noise, gradksize, gradsigma, ksize, sigma) D = estimate_D(T, m) HL = get_HL(L_noise) L_noise = L_noise + 0.5 * s * np.trace(D * HL, axis1=2, axis2=3) errors.append(np.sum(np.abs(L_noise - L))) # print(np.sum(np.abs(L_noise - L))) print("Steps:", i) im1.set_data(L_noise) ani = FuncAnimation( plt.gcf(), update, frames=range(steps), repeat=False, init_func=init_func, ) plt.show() plt.plot(errors) plt.show()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Aug 31 21:25:07 2019 @author: Kuo """ from add_noise import add_noise add_noise('insilico_size100_2_data', 15)
import os import cv2 as cv import numpy as np from tifffile import imsave from add_noise import add_noise from PIL import Image src_dir='./small_data' dst_dir='./aug_data_small' for directory in os.listdir(src_dir): #for root, dirs, files in os.walk(os.path.join(src_dir,directory)): for dir in os.listdir(os.path.join(src_dir,directory)): for file in os.listdir(os.path.join(src_dir,directory,dir)): path=os.path.join(src_dir,directory,dir,file) print(path) img = cv.imread(path) if(img.shape!=(150,150,3)): img=cv.resize(img,(150,150)) noisy_image = add_noise(img) dst_path=os.path.join(dst_dir,directory,dir,file) #cv.imwrite(dst_path,noisy_image) dst_path2=dst_path[:-4] imsave(dst_path2+'.tif', noisy_image)