def cuml_like(arr1, arr2): arr1=MinMaxScaler().fit_transform(arr1.astype(float).reshape(-1,1)) arr2=MinMaxScaler().fit_transform(arr2.astype(float).reshape(-1,1)) if arr1.size!=arr2.size: raise Exception('must be equal-sized arrays arr1 and arr2') new = np.zeros_like(arr1) for n in range(new.size): new[n] = arr1[:n+1].sum()+arr2[n+1:].sum() return new
def cuml_like(arr1, arr2): arr1 = MinMaxScaler().fit_transform(arr1.astype(float).reshape(-1, 1)) arr2 = MinMaxScaler().fit_transform(arr2.astype(float).reshape(-1, 1)) if arr1.size != arr2.size: raise Exception('must be equal-sized arrays arr1 and arr2') new = np.zeros_like(arr1) for n in range(new.size): new[n] = arr1[:n + 1].sum() + arr2[n + 1:].sum() return new
def make_data(n_samples=1000, n_features=1, n_targets=1, informative_prop=1.0, noise=0.0, test_prop=0.1, valid_prop=0.3, method='linear'): if method == 'linear': params = dict(n_features=n_features, n_informative=int(n_features*informative_prop), noise=noise, n_targets=n_targets, n_samples=n_samples, shuffle=False, bias=0.0) X, Y = make_regression(**params) elif method == 'boston': boston = load_boston() X = boston.data Y = boston.target else: params = dict(n_samples=n_samples, n_features=n_features) X, Y = make_friedman3(n_samples=n_samples, n_features=n_features, noise=noise) X = MinMaxScaler(feature_range=(0.0,1.0)).fit_transform(X) X = X.astype(theano.config.floatX) Y = MinMaxScaler(feature_range=(0.0,1.0)).fit_transform(Y) Y = Y.astype(theano.config.floatX) if len(X.shape) > 1: n_features = X.shape[1] else: X = X.reshape(X.shape[0], -1) n_features = 1 if len(Y.shape) > 1: n_targets = Y.shape[1] else: Y = Y.reshape(Y.shape[0], -1) n_targets = 1 X_train, Y_train, X_valid, Y_valid, X_test, Y_test = \ train_valid_test_split(X, Y, test_prop=valid_prop, valid_prop=valid_prop) return dict( X_train=theano.shared(X_train), Y_train=theano.shared(Y_train), X_valid=theano.shared(X_valid), Y_valid=theano.shared(Y_valid), X_test=theano.shared(X_test), Y_test=theano.shared(Y_test), num_examples_train=X_train.shape[0], num_examples_valid=X_valid.shape[0], num_examples_test=X_test.shape[0], input_dim=n_features, output_dim=n_targets)
def normalize_to(data, to_low, to_high): """ Normalize data Parameters ---------- data: list[float] to_low: int Min range to_high: int Max range Returns ------- `numpy.ndarray` Scaled data """ # convert to `numpy.ndarray` data = np.array(data) # scale data scaled_data = MinMaxScaler(feature_range=(to_low, to_high)).fit_transform(data.reshape(-1,1)).ravel() # convert to int return scaled_data.astype(np.int32)
def crops_from_trial(X, y, crop_len, stride=0, time_last=True, dummy_idx=0, normalize=True): crop_len = int(crop_len) x_list, y_list = list(), list() if stride > 0: num_valid_crops = int((X.shape[0] - crop_len) / stride) + 1 else: num_valid_crops = int(X.shape[0] // crop_len) for crop in range(num_valid_crops): if stride > 0: crop_idx = int(crop * stride) else: crop_idx = int(crop * crop_len) x_crop = X[crop_idx:crop_idx + crop_len, ] y_crop = y[crop_idx:crop_idx + crop_len, ] if normalize: y_crop = MinMaxScaler(feature_range=(-1, 1)).fit_transform(y_crop.reshape(-1, 1)).squeeze() x_crop = exponential_running_standardize(x_crop, init_block_size=250, factor_new=0.001, eps=1e-4) x_list.append( np.expand_dims(x_crop.T if time_last else x_crop, axis=dummy_idx).astype(np.float32) ) y_list.append(y_crop.astype(np.float32)) return x_list, y_list
def prepare_data(df, n, step, test_size=0.3): delta = df.drop('volume', axis=1).pct_change() log_volume_delta = np.log(df.volume) - np.log(df.volume.shift(1)) delta['volume'] = log_volume_delta delta = delta.dropna(how='all') df = df.iloc[1:, :] nrows = delta.shape[0] i = 0 X = [] y = [] while True: x_start, x_end, y_start, y_end = get_idx(i, n, step) if y_end > nrows - 1: break x = delta.iloc[x_start:x_end, :].values x = MinMaxScaler().fit_transform(x) * 255 X.append(x.astype('int')) y.append((df.iloc[y_end, :].close - df.iloc[y_start, :].close) / df.iloc[y_start, :].close) i += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) X_train, X_test = np.expand_dims(X_train, -1), np.expand_dims(X_test, -1) y_train, y_test = np.array(y_train) >= 0, np.array(y_test) >= 0 return X_train, X_test, y_train * 1.0, y_test * 1.0
def load_mice(one_hot=False): filling_value = -100000 X = np.genfromtxt('datasets/Data_Cortex_Nuclear.csv', delimiter=',', skip_header=1, usecols=range(1, 78), filling_values=filling_value, encoding='UTF-8') classes = np.genfromtxt('datasets/Data_Cortex_Nuclear.csv', delimiter=',', skip_header=1, usecols=range(78, 81), dtype=None, encoding='UTF-8') for i, row in enumerate(X): for j, val in enumerate(row): if val == filling_value: X[i, j] = np.mean([ X[k, j] for k in range(classes.shape[0]) if np.all(classes[i] == classes[k]) ]) DY = np.zeros((classes.shape[0]), dtype=np.uint8) for i, row in enumerate(classes): for j, (val, label) in enumerate(zip(row, ['Control', 'Memantine', 'C/S'])): DY[i] += (2**j) * (val == label) Y = np.zeros((DY.shape[0], np.unique(DY).shape[0])) for idx, val in enumerate(DY): Y[idx, val] = 1 X = MinMaxScaler(feature_range=(0, 1)).fit_transform(X) indices = np.arange(X.shape[0]) np.random.shuffle(indices) X = X[indices] Y = Y[indices] DY = DY[indices] classes = classes[indices] if not one_hot: Y = DY X = X.astype(np.float32) Y = Y.astype(np.float32) print(X.shape, Y.shape) return (X[:X.shape[0] * 4 // 5], Y[:X.shape[0] * 4 // 5]), (X[X.shape[0] * 4 // 5:], Y[X.shape[0] * 4 // 5:])
def build_dataset(X, y, labels, test_prop=0.2, valid_prop=0.2, register='both', test=False): if register in ['IDS', 'ADS']: sel_ixs = np.in1d(y, np.nonzero(labels[:, 1]==register)) X = X[sel_ixs] y = y[sel_ixs] elif register == 'both': # merge IDS and ADS labels per phone ix2phone = dict(enumerate(labels[:, 0])) phones = sorted(set(ix2phone.values())) phone2newix = {p:ix for ix, p in enumerate(phones)} y = np.array([phone2newix[ix2phone[i]] for i in y]) else: raise ValueError('invalid option for register: {0}'.format(register)) oldix2newix = {old_ix:new_ix for new_ix, old_ix in enumerate(np.unique(y))} y = np.array([oldix2newix[i] for i in y]) # X = StandardScaler().fit_transform(X) X = MinMaxScaler(feature_range=(0,1)).fit_transform(X) X = X.astype(theano.config.floatX) y = y.astype('int32') nclasses = np.unique(y).shape[0] nfeatures = X.shape[1] X_train, y_train, X_valid, y_valid, X_test, y_test = \ train_valid_test_split(X, y, test_prop=test_prop, valid_prop=valid_prop) if test: X = X_train[100:200] y = y_train[100:200] X_train = X_train[:100] y_train = y_train[:100] X_valid = X_valid[:10] y_valid = y_valid[:10] X_test = X_test[:50] y_test = y_test[:50] return dict( X_train=theano.shared(X_train), y_train=theano.shared(y_train), X_valid=theano.shared(X_valid), y_valid=theano.shared(y_valid), X_test=theano.shared(X_test), y_test=theano.shared(y_test), num_examples_train=X_train.shape[0], num_examples_valid=X_valid.shape[0], num_examples_test=X_test.shape[0], input_dim=nfeatures, output_dim=nclasses, labels=labels )
def load_real_train_data(): """ :return: numpy array of real MNIST images """ (trainX, trainy), (_, _) = load_data() # Load MNIST data # normalize: d0, d1, d2 = trainX.shape trainX = MinMaxScaler( (-1, 1)).fit_transform(trainX.reshape(d0, d1 * d2)).reshape(d0, d1, d2) trainX = np.expand_dims(trainX.astype('float32'), axis=-1) return trainX
def __getitem__(self, idx): # read sound samples from file sound_samples, sampling_rate, label = Sound.read_sound(self, idx) mel = librosa.feature.melspectrogram(y=sound_samples, sr=sampling_rate, \ n_fft=self.nfft, hop_length=self.hop_len, n_mels=self.mels) mel = librosa.power_to_db(mel, np.max) if self.truncate: mel = adjust_matrix(mel, 2**closest_power_2(mel.shape[0]), 2**closest_power_2(mel.shape[1])) initial_shape = mel.shape mel_scaled_spectrogram_db = MinMaxScaler().fit_transform( mel.reshape(-1, 1)).reshape((1, *initial_shape)) mel_scaled_spectrogram_db = mel_scaled_spectrogram_db.astype( np.float32) return [mel_scaled_spectrogram_db], label
def grid_search(file): df = pd.read_table(file, lineterminator='\n', sep='\t') col = list(df.columns.values) df[col[1:-1]] = df[col[1:-1]].astype(float) df[col[-1]] = df[col[-1]].astype(int) X = df[col[1:-1]].as_matrix() X = MinMaxScaler(feature_range=(0.0, 1.0)).fit_transform(X) X = X.astype(float, order='C') Y = df[col[-1]].as_matrix() Y = Y.astype(float, order='C') parameters = { 'C': [0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0], 'solver': ['lbfgs'], 'max_iter': [100, 250, 500, 750, 1000, 1500, 2500] } lr = log_reg(penalty='l2', class_weight='balanced') model = GridSearchCV(lr, parameters, cv=5, scoring='roc_auc', n_jobs=5) model.fit(X, Y) return model.best_params_, df
def get_item(self, idx): """ Function for getting periodogram """ # read sound samples from file sound_samples, sampling_rate, labels = Sound.read_sound(self, idx=idx, raw=True) periodogram = abs(np.fft.rfft(sound_samples, sampling_rate))[1:] if self.scale_db: periodogram = 20 * np.log10( periodogram / np.iinfo(sound_samples[0]).max) frequencies = np.fft.rfftfreq(sampling_rate, d=(1. / sampling_rate))[1:] if self.slice_freq: periodogram = periodogram[self.slice_freq[0]:self.slice_freq[1]] frequencies = frequencies[self.slice_freq[0]:self.slice_freq[1]] if self.scale: periodogram = MinMaxScaler().fit_transform( periodogram.reshape(-1, 1)).squeeze() periodogram = periodogram.astype(np.float32) return (periodogram, frequencies), labels
cD3.fill(0) for i in range(1, len(coeffs)-3): coeffs[i]=pywt.threshold(coeffs[i], threshold) rdata = pywt.waverec(coeffs=coeffs, wavelet='db5') print("="*30) print("showing your ecgdata") plt.figure(figsize=(20,4)) plt.subplot(3,1,1) plt.plot(data) plt.title("raw data") plt.subplot(3,1,2) plt.plot(rdata) plt.title("new data") plt.savefig('D:\\anaconda3\\envs\\myTensorflow\\ECG\\Tang\\ecgtest3.png') plt.show() print("="*30) print("analysing your ecgdata using ECGNet") tt = np.array(rdata).reshape((5000,1)) tt = MinMaxScaler(feature_range=(0,1)).fit_transform(tt) interpreter.allocate_tensors() inputIndex=interpreter.get_input_details()[0]["index"] outputIndex = interpreter.get_output_details()[0]["index"] tt = tt.reshape((-1,5000,1,1)) tt = tt.astype(np.float32) interpreter.set_tensor(inputIndex, tt) interpreter.invoke() prediction = interpreter.get_tensor(outputIndex)[0] print(prediction) print("so far,you are healthy. keep exercising and stay fit.")
layers[i+1] = tf.matmul(layers[i],w)+b # create phases mid_idx = int((len(w_dict)-1) / 2) n_phases = mid_idx phase_training_ops = [] for phase_idx in range(n_phases): pass if __name__=='__main__': tf.reset_default_graph() from keras.datasets.mnist import load_data (xtrain,xtest),(ytrain,ytest) = load_data() from sklearn.preprocessing import StandardScaler, MinMaxScaler xtrain = MinMaxScaler((0,1)).fit_transform(xtrain.astype('float64').reshape(xtrain.shape[0],-1)) h_layers = [300, 150, 300] act_fn = 'elu' lr = 1e-2 l2_pen = 1e-4 sae = tfStackedAutoEncoder(h_layers=h_layers) layers = sae.fit(xtrain)
import numpy as np from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.cross_validation import train_test_split import theanets import climate climate.enable_default_logging() X_orig = np.load('/Users/bzamecnik/Documents/music-processing/music-processing-experiments/c-scale-piano_spectrogram_2048_hamming.npy') sample_count, feature_count = X_orig.shape X = MinMaxScaler().fit_transform(X_orig) X = X.astype(np.float32) X_train, X_test = train_test_split(X, test_size=0.4, random_state=42) X_val, X_test = train_test_split(X_test, test_size=0.5, random_state=42) # (np.maximum(0, 44100/512*np.arange(13)-2)).astype('int') #blocks = [0, 84, 170, 256, 342, 428, 514, 600, 687, 773, 859, 945, 1031, 1205] blocks = [0, 48, 98, 148, 198, 248, 298, 348, 398, 448, 498, 548, 598, 700] def make_labels(blocks): label_count = len(blocks) - 1 labels = np.zeros(blocks[-1]) for i in range(label_count): labels[blocks[i]:blocks[i+1]] = i return labels y = make_labels(blocks) def score(exp, Xs): X_train, X_val, X_test = Xs
def vad(data, fs, fs_vad=16000, hop_length=30, vad_mode=0): """ Voice activity detection. This was implementioned for easier use of py-webrtcvad. Parameters ---------- data : ndarray numpy array of mono (1 ch) speech data. 1-d or 2-d, if 2-d, shape must be (1, time_length) or (time_length, 1). if data type is int, -32768 < data < 32767. if data type is float, -1 < data < 1. fs : int Sampling frequency of data. fs_vad : int, optional Sampling frequency for webrtcvad. fs_vad must be 8000, 16000, 32000 or 48000. Default is 16000. hop_length : int, optional Step size[milli second]. hop_length must be 10, 20, or 30. Default is 0.1. vad_mode : int, optional set vad aggressiveness. As vad_mode increases, it becomes more aggressive. vad_mode must be 0, 1, 2 or 3. Default is 0. Returns ------- vact : ndarray voice activity. time length of vact is same as input data. If 0, it is unvoiced, 1 is voiced. """ # check argument if fs_vad not in [8000, 16000, 32000, 48000]: raise ValueError('fs_vad must be 8000, 16000, 32000 or 48000.') if hop_length not in [10, 20, 30]: raise ValueError('hop_length must be 10, 20, or 30.') if vad_mode not in [0, 1, 2, 3]: raise ValueError('vad_mode must be 0, 1, 2 or 3.') # check data if data.dtype.kind == 'i': if data.max() > 2**15 - 1 or data.min() < -2**15: raise ValueError( 'When data.type is int, data must be -32768 < data < 32767.') data = data.astype('f') / 2.0**15 elif data.dtype.kind == 'f': if np.abs(data).max() > 1: # librosa.load()后有可能稍微大于1.0 data = MinMaxScaler( (-1, 1)).fit_transform(data.reshape(-1, 1)).reshape(-1) # raise ValueError( # 'When data.type is float, data must be -1.0 <= data <= 1.0.') data = data.astype('f') else: raise ValueError('data.dtype must be int or float.') data = data.squeeze() if not data.ndim == 1: raise ValueError('data must be mono (1 ch).') # resampling if fs != fs_vad: resampled = resample(data, fs, fs_vad) if np.abs(resampled).max() > 1.0: resampled *= (0.99 / np.abs(resampled).max()) warn('Resampling causes data clipping. data was rescaled.') else: resampled = data resampled = (resampled * 2.0**15).astype('int16') hop = fs_vad * hop_length // 1000 framelen = resampled.size // hop + 1 padlen = framelen * hop - resampled.size paded = np.lib.pad(resampled, (0, padlen), 'constant', constant_values=0) framed = frame(paded, frame_length=hop, hop_length=hop).T vad = webrtcvad.Vad() vad.set_mode(vad_mode) valist = [vad.is_speech(tmp.tobytes(), fs_vad) for tmp in framed] hop_origin = fs * hop_length // 1000 va_framed = np.zeros([len(valist), hop_origin]) va_framed[valist] = 1 return va_framed.reshape(-1)[:data.size]