def load_scale_param(self, path): x_scaler, y_scaler = None, None if self.kind == KindNormalization.Zscore: x_scaler = StandardScaler() y_scaler = StandardScaler() x_scale = np.load(path + '/x_scaler_scale.npy') x_mean = np.load(path + '/x_scaler_mean.npy') x_var = np.load(path + '/x_scaler_var.npy') y_scale = np.load(path + '/y_scaler_scale.npy') y_mean = np.load(path + '/y_scaler_mean.npy') y_var = np.load(path + '/y_scaler_var.npy') x_scaler.scale_ = x_scale x_scaler.mean_ = x_mean x_scaler.var_ = x_var y_scaler.scale_ = y_scale y_scaler.mean_ = y_mean y_scaler.var_ = y_var elif self.kind == KindNormalization.Scaling: x_scaler = MinMaxScaler() y_scaler = MinMaxScaler() x_scaler = load(open(path + '/x_scaler_minmax.pkl', 'rb')) y_scaler = load(open(path + '/y_scaler_minmax.pkl', 'rb')) else: print('[Error] there is a problem loading distributions %s.' % self.kind) return x_scaler, y_scaler
def _create_scaler(self): scaler = StandardScaler() scaler.mean_ = np.array( [1.22685548e+08, 6.15609944e+05, 2.55416063e+06]) scaler.scale_ = np.array( [2.94523441e+08, 1.39027033e+06, 6.15530731e+06]) return scaler
def normalize(): """Normalize mel spectrogram with pre-computed statistics.""" config = parse_and_config() if config["format"] == "npy": # init scaler with saved values scaler = StandardScaler() scaler.mean_, scaler.scale_ = np.load( os.path.join(config["outdir"], "stats.npy")) scaler.n_features_in_ = config["num_mels"] else: raise ValueError("'npy' is the only supported format.") # find all "raw-feats" files in both train and valid folders glob_path = os.path.join(config["rootdir"], "**", "raw-feats", "*.npy") mel_raw_feats = glob.glob(glob_path, recursive=True) logging.info(f"Files to normalize: {len(mel_raw_feats)}") # check for output directories os.makedirs(os.path.join(config["outdir"], "train", "norm-feats"), exist_ok=True) os.makedirs(os.path.join(config["outdir"], "valid", "norm-feats"), exist_ok=True) p = Pool(config["n_cpus"]) partial_fn = partial(gen_normal_mel, scaler=scaler, config=config) list(p.map(partial_fn, tqdm(mel_raw_feats, desc="[Normalizing]")))
def load_augmented_dataset(cfg, dset, augmentation_data_dir, keep_dims=None, standardize=False): save_dir = os.path.join(WORK_DIR, cfg['experiment_name'], cfg['dataset'], 'data_cache') os.makedirs(save_dir, exist_ok=True) save_name = os.path.join(save_dir, get_kaldi_cache_name(cfg, dset)) scaler_name = os.path.join(save_dir, get_kaldi_cache_name(cfg, dset=None).split('.')[0] + '_scaler.pkl') dataset = AugmentedSpeechDataset(cfg=cfg, dset=dset, augmentation_data_dir=augmentation_data_dir, keep_dims=keep_dims) if not os.path.isfile(scaler_name): print('Saving scaler to cache...') if dset == 'train': # Standardizer scaler = StandardScaler(copy=False) mean_ = dataset.feats.mean(0) std_ = dataset.feats.std(0) scaler.mean_ = mean_ scaler.scale_ = std_ with open(scaler_name, 'wb') as f: pickle.dump(scaler, f) if standardize and not os.path.isfile(scaler_name): print('Standardize selected but there is no scaler!') raise FileNotFoundError if standardize: with open(scaler_name, 'rb') as f: scaler = pickle.load(f) dataset.feats = scaler.transform(dataset.feats) return dataset
def post_process(insurance_input, sample_vector): """ :param insurance_input: InsuranceInput :param sample_vector: SampleVector :return: SampleVector """ if not isinstance(insurance_input, InsuranceInput): raise ValueError("sample_input is not SampleInput") if not isinstance(sample_vector, SampleVector): raise ValueError("sample_vector is not SampleVector") feature_con = ['amount', 'num', 'sex_ratio'] feature_log = ['amount', 'num'] others = [] for item in feature_con: if item in feature_log: other = math.log10(insurance_input.__dict__[item]) else: other = insurance_input.__dict__[item] others.append(other) scaler = StandardScaler() # scaler.mean_ = np.array([6.19479067, 1.675198, 0.66043834]) # scaler.scale_ = np.array([0.7849499, 0.52186272, 0.26229562]) scaler.mean_ = np.array([6.193721, 1.675647, 0.66060]) scaler.scale_ = np.array([0.785466, 0.522194, 0.26211]) others = scaler.transform(np.array(others).reshape(1, -1)).reshape(3) sample_vector.values += list(others) return sample_vector
def get_generater_scalar(stats="../pretrained_model/stats.h5"): scalar = StandardScaler() scalar.mean_ = read_hdf5(stats, "mean") scalar.scale_ = read_hdf5(stats, "scale") scalar.n_features_in_ = scalar.mean_.shape[0] return scalar
def data_scaler(self, data): temp_scaler = StandardScaler() temp_scaler.mean_ = self.scaler.mean_ temp_scaler.var_ = self.scaler.var_ temp_scaler.n_samples_seen_ = self.scaler.n_samples_seen_ temp_scaler.scale_ = self.scaler.scale_ return temp_scaler.transform(data)
def switch_scaler(X, original, new=None): """ Switch the scaler :param X: data scaled by orig :type X: array or dataframe :param original: original scaler :type original: scaler :param new: new scaler :type new: scaler :return Xnew: new(inverse_original(X)) :rtype Xnew: array or dataframe """ # new is an identity transform (only do the inverse transform) if new is None: new = StandardScaler() new.mean_ = 0.0 new.scale_ = 1.0 # Inverse the transformation inverse = original.inverse_transform(X) if isinstance(X, pd.DataFrame): return pd.DataFrame(new.transform(inverse), index=X.index, columns=X.columns) else: return new.transform(inverse)
def onefun(): """ Integrate and plot one ODE (using PredictorODE) and print important statistics. """ # step_sizes = [0.025, 0.029, 0.033, 0.039, 0.045, 0.052, 0.060, 0.070] step_sizes = [0.025, 0.029, 0.033, 0.039, 0.045, 0.052, 0.060, 0.070] dim_state = 6 # nodes per integration step d = 3 # dimension of the ODE state space dim_action = len(step_sizes) memory = 0 # how many integration steps the predictor can look back x0 = np.array([10.0, 10.0, 10.0]) # x0 = np.random.rand(3) * 20 - 10 # print(x0) scaler = StandardScaler() scaler.mean_ = np.zeros((dim_state * d + 1) * (memory + 1)) scaler.mean_[0] = -0.045 scaler.scale_ = 10 * np.ones((dim_state * d + 1) * (memory + 1)) scaler.scale_[0] = 0.1 # scaler.mean_ = np.zeros((dim_state * d + 1) * (memory + 1)) # scaler.scale_ = np.ones((dim_state * d + 1) * (memory + 1)) env = ODEEnv(fun=LorenzSystem(), max_iterations=10000, initial_step_size=0.025, step_size_range=(step_sizes[0], step_sizes[-1]), error_tol=0.0001, nodes_per_integ=dim_state, memory=memory, x0=x0, max_dist=100) predictor = PredictorQODE(step_sizes=step_sizes, model=build_value_modelODE( dim_state=dim_state * d + 1, dim_action=dim_action, filename='predictorODE', lr=0.01, memory=memory), scaler=scaler) # predictor = PredictorConstODE(0.05) # integrator = ClassicRungeKutta() integrator = RKDP() reward, num_evals = integrate_env(predictor, integrator, env, t1=2, plot=True) print("reward: {}".format(reward)) print("nfev: {}".format(num_evals)) print("mean error: {}".format(np.mean(env.errors))) print("min error, max error: {}, {}".format( *np.round((np.min(env.errors), np.max(env.errors)), 5))) print("min stepsize: {}".format(np.min(env.deltas))) print("max stepsize: {}".format(np.max(env.deltas)))
def standard_scaler(self): """Return a sklearn.preprocessing.StandardScaler""" s = StandardScaler() s.mean_ = self.mean() var = self.var() var[var <= 0] = 1 # ignore variables with zero variance s.std_ = np.sqrt(var) return s
def generate_fn(args): device = torch.device("cuda" if hparams.use_cuda else "cpu") upsample_factor = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) model = create_model(hparams) checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) if torch.cuda.device_count() > 1: model.module.load_state_dict(checkpoint['model']) else: model.load_state_dict(checkpoint['model']) model.to(device) model.eval() if hparams.feature_type == "mcc": scaler = StandardScaler() scaler.mean_ = np.load(os.path.join(args.data_dir, 'mean.npy')) scaler.scale_ = np.load(os.path.join(args.data_dir, 'scale.npy')) feat_transform = transforms.Compose([lambda x: scaler.transform(x)]) else: feat_transform = None with torch.no_grad(): samples, local_condition, uv = prepare_data(args.lc_file, upsample_factor, model.receptive_field, read_fn=lambda x: np.load(x), feat_transform=feat_transform) start = time.time() for i in tqdm(range(local_condition.size(-1) - model.receptive_field)): sample = torch.FloatTensor(np.array(samples[-model.receptive_field:]).reshape(1, -1, 1)) h = local_condition[:, :, i+1 : i+1 + model.receptive_field] sample, h = sample.to(device), h.to(device) output = model(sample, h) if hparams.feature_type == "mcc": if uv[i+model.receptive_field] == 0: output = output[0, :, -1] outprob = F.softmax(output, dim=0).cpu().numpy() sample = np.random.choice( np.arange(hparams.quantization_channels), p=outprob) else: output = output[0, :, -1] * 2 outprob = F.softmax(output, dim=0).cpu().numpy() sample = outprob.argmax(0) else: # I tested sampling, but it will produce more noise, # so I use argmax in this time. output = output[0, :, -1] outprob = F.softmax(output, dim=0).cpu().numpy() sample = outprob.argmax(0) sample = mu_law_decode(sample, hparams.quantization_channels) samples.append(sample) write_wav(np.asarray(samples), hparams.sample_rate, os.path.join(os.path.dirname(args.checkpoint), "generated-{}.wav".format(os.path.basename(args.checkpoint))))
def normalize_scale_with_params(channel_data: np.ndarray, mean: float = None, std: float = None) -> np.ndarray: tmp = channel_data.reshape((-1, 1), order='C') scaler = StandardScaler(copy=False, with_mean=mean, with_std=std) scaler.mean_ = mean scaler.scale_ = std scaler.transform(tmp) std_channel_data = tmp.reshape(channel_data.shape, order='C') return std_channel_data
def load_model(dir_name): model = pickle.load(open(dir_name + '/RF_model_.sav', 'rb')) ls_need_col = json.load(open(dir_name + '/ls_need_col', "r")) try: scaler = StandardScaler() scale_data = json.load(open(dir_name + '/scaler', "r")) scaler.mean_ = scale_data[0] scaler.scale_ = scale_data[1] except: scaler = None return model, ls_need_col, scaler
def pareto_const_predictor(): """ Find performance (avg. error, avg. evals) of constant step size choice w.r.t. a function class. """ num_samples = 1000 step_sizes = [0.5, 0.6, 0.7, 0.8, 1.0, 1.2, 1.5] dim_state = 6 # nodes per integration step d = 2 # dimension of the ODE state space dim_action = len(step_sizes) memory = 0 # how many integration steps the predictor can look back x0 = np.array([1.0, 1.0]) scaler = StandardScaler() # scaler.mean_ = np.zeros((dim_state * d + 1) * (memory + 1)) # scaler.scale_ = 100 * np.ones((dim_state * d + 1) * (memory + 1)) # scaler.scale_[0] = 0.1 scaler.mean_ = np.zeros((dim_state * d + 1) * (memory + 1)) scaler.scale_ = np.ones((dim_state * d + 1) * (memory + 1)) env = ODEEnv(fun=Rotation(), max_iterations=10000, initial_step_size=0.6, step_size_range=step_sizes, error_tol=0.0001, nodes_per_integ=dim_state, memory=memory, x0=x0, max_dist=20) integrator = RKDP() paretos = [] for action in range(len(step_sizes)): print('action: {}'.format(action)) predictor = PredictorConstODE(action) errors = [] steps = [] t1s = [] for i in range(num_samples): env.reset(integrator) integrate_env(predictor, integrator, env) errors.append(np.mean(env.errors)) steps.append(env.evals) t1s.append(env.timesteps[-1]) print(np.mean(steps)) print(np.mean(errors)) print('') paretos.append((np.mean(errors), np.mean(steps))) paretos = np.array(paretos) np.save('pareto_const.npy', paretos)
def from_dict(d): scaler = StandardScaler(copy=d['copy'], with_mean=d['with_mean'], with_std=d['with_std']) scaler.mean_ = np.array(d['mean_']) scaler.scale_ = np.array(d['scale_']) scaler.var_ = np.array(d['var_']) encoder = ContinuousEncoder(d['name'], d['bos'], d['eos'], scaler=scaler) return encoder
def main(args): # this will be removed soon if 'mutau' in args.input_dir or 'mt20' in args.input_dir: tree_prefix = 'mt_tree' elif 'etau' in args.input_dir or 'et20' in args.input_dir: tree_prefix = 'et_tree' else: raise Exception( 'Input files must have MUTAU or ETAU in the provided path. You gave {}, ya goober.' .format(args.input_dir)) # get scaler setup scaler = StandardScaler() scaler_info = pd.HDFStore(args.input_name)['scaler'] scaler_info = scaler_info.drop('isSM', axis=0) scaler.mean_ = scaler_info['mean'].values.reshape(1, -1) scaler.scale_ = scaler_info['scale'].values.reshape(1, -1) scaler_columns = scaler_info.index.values # create output directory if not path.isdir('Output/trees/{}'.format(args.output_dir)): mkdir('Output/trees/{}'.format(args.output_dir)) filelist = build_filelist(args.input_dir) print 'Files to process...' pprint(dict(filelist)) nsyst = len(filelist.keys()) i = 0 for syst, ifiles in filelist.iteritems(): # create output sub-directory (needed for systematics/nominal) out_path = 'Output/trees/{}/{}'.format(args.output_dir, syst) if not path.exists(out_path): mkdir(out_path) n_processes = min(12, multiprocessing.cpu_count() / 2) pool = multiprocessing.Pool(processes=n_processes) jobs = [ pool.apply_async( classify, (ifile, tree_prefix, scaler, scaler_columns, args.model, '{}/{}'.format(args.output_dir, syst))) for ifile in ifiles ] [j.get() for j in jobs] pool.close() pool.join() i += 1 print 'All files written for {} ({} of {})'.format(syst, i, nsyst) if args.move != None: system('mkdir -p {}'.format(args.move)) subprocess.Popen( ['nohup', 'mv', out_path, '{}/'.format(args.move)]) print 'Moved files from {} to {}'.format(out_path, args.move)
def normalize(self, means=None, stds=None): """ Normalize dataset either from its own statistical properties or from external one. In the second case, both means and stds must be provided. """ scaler = StandardScaler() assert (means is None) == (stds is None) if means and stds: scaler.mean_ = np.array(means) scaler.std_ = np.array(stds) else: scaler.fit(self.data) self.data = scaler.transform(self.data, copy=False) return scaler.mean_.tolist(), scaler.std_.tolist()
def __init__(self, upstream_dim, upstream_rate, downstream_expert, expdir, **kwargs): super(DownstreamExpert, self).__init__() # basic settings self.expdir = expdir self.upstream_dim = upstream_dim self.trgspk = downstream_expert['trgspk'] self.datarc = downstream_expert['datarc'] self.modelrc = downstream_expert['modelrc'] self.acoustic_feature_dim = self.datarc["fbank_config"]["n_mels"] self.fs = self.datarc["fbank_config"]["fs"] self.resample_ratio = self.fs / self.datarc["fbank_config"][ "n_shift"] * upstream_rate / FS print('[Downstream] - resample_ratio: ' + str(self.resample_ratio)) # load datasets self.train_dataset = VCC2020Dataset('train', self.trgspk, **self.datarc) self.dev_dataset = VCC2020Dataset('dev', self.trgspk, **self.datarc) self.test_dataset = VCC2020Dataset('test', self.trgspk, **self.datarc) # load statistics file if exists, and calculate if not found scaler = StandardScaler() stats_root = self.datarc["stats_root"] if not os.path.exists(stats_root): os.makedirs(stats_root) stats_path = os.path.join(stats_root, self.trgspk + ".h5") if os.path.exists(stats_path): print("[Stats] - reading stats from " + str(stats_path)) scaler.mean_ = read_hdf5(stats_path, "mean") scaler.scale_ = read_hdf5(stats_path, "scale") else: print("[Stats] - " + str(stats_path) + " does not exist. Calculating statistics...") for _, _, lmspc, _ in self.train_dataset: scaler.partial_fit(lmspc) write_hdf5(stats_path, "mean", scaler.mean_.astype(np.float32)) write_hdf5(stats_path, "scale", scaler.scale_.astype(np.float32)) print("[Stats] - writing stats to " + str(stats_path)) self.stats = scaler # define model and loss self.model = Model(input_dim=self.upstream_dim, output_dim=self.acoustic_feature_dim, resample_ratio=self.resample_ratio, stats=self.stats, **self.modelrc) self.objective = Loss(self.stats)
def main(): scaler = np.load("scaler.npz") X_scaler = StandardScaler() X_scaler.mean_, X_scaler.scale_ = scaler["mean"], scaler["scale"] with open("svm.pkl", mode="rb") as f: svc = pickle.load(f) cap = cv2.VideoCapture("project_video.mp4") fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 20.0, (1280,720)) heatmap = None ex_bbox_list = [] exex_bbox_list = [] exexex_bbox_list = [] exexexex_bbox_list = [] index=0 while(1): ret, image = cap.read() index+=1 heatmap = np.zeros_like(image) print("index", index) converted_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) windows = slide_window(converted_image, x_start_stop=[None, None], y_start_stop=[400, image.shape[0]-200], xy_window=(240, 160), xy_overlap=(0.9, 0.9)) bboxes = search_windows(converted_image, windows, svc, X_scaler, hog_color="YCrCb", hog_channel="ALL", spatial_size=(16, 16), spatial_color="YCrCb", hist_color="YCrCb", spatial_feat=True, orient=18, cell_per_block=2) if bboxes: heatmap = add_heat(heatmap, np.array(bboxes)[:, 0], value=2) if ex_bbox_list: heatmap = add_heat(heatmap, ex_bbox_list, value=2) if exex_bbox_list: heatmap = add_heat(heatmap, exex_bbox_list, value=2) if exexex_bbox_list: heatmap = add_heat(heatmap, exexex_bbox_list, value=1) if exexexex_bbox_list: heatmap = add_heat(heatmap, exexexex_bbox_list, value=1) heatmap = apply_threshold(heatmap, threshold=6) labels = label(heatmap) exexexex_bbox_list = exexex_bbox_list exexex_bbox_list = exex_bbox_list exex_bbox_list = ex_bbox_list window_img, ex_bbox_list = draw_labeled_bboxes(image, labels) out.write(window_img) # window_img = draw_boxes(image, bboxes, color=(0, 0, 255), thick=6) # cv2.imwrite("./images13/detected" + str(index) + ".jpg", window_img) out.release() cap.release()
def load_swag(path): save_items = torch.load(path) swag_model = (SWAGModel(save_items['hparams']).init_params( save_items['swa_params'])) swag_model.w_avg = save_items['w_avg'] swag_model.w2_avg = save_items['w2_avg'] swag_model.pre_D = save_items['pre_D'] if 'v50' in path: # Assume fixed scale: ssX = StandardScaler() ssX.scale_ = np.array([ 2.88976974e+03, 6.10019661e-02, 4.03849732e-02, 4.81638693e+01, 6.72583662e-02, 4.17939679e-02, 8.15995339e+00, 2.26871589e+01, 4.73612029e-03, 7.09223721e-02, 3.06455099e-02, 7.10726478e-01, 7.03392022e-01, 7.07873597e-01, 7.06030923e-01, 7.04728204e-01, 7.09420909e-01, 1.90740659e-01, 4.75502285e-02, 2.77188320e-02, 7.08891412e-01, 7.05214134e-01, 7.09786887e-01, 7.04371833e-01, 7.04371110e-01, 7.09828420e-01, 3.33589977e-01, 5.20857790e-02, 2.84763136e-02, 7.02210626e-01, 7.11815232e-01, 7.10512240e-01, 7.03646004e-01, 7.08017286e-01, 7.06162814e-01, 2.12569430e-05, 2.35019125e-05, 2.04211110e-05, 7.51048890e-02, 3.94254400e-01, 7.11351099e-02 ]) ssX.mean_ = np.array([ 4.95458585e+03, 5.67411891e-02, 3.83176945e-02, 2.97223474e+00, 6.29733979e-02, 3.50074471e-02, 6.72845676e-01, 9.92794768e+00, 9.99628430e-01, 5.39591547e-02, 2.92795061e-02, 2.12480714e-03, -1.01500319e-02, 1.82667162e-02, 1.00813201e-02, 5.74404197e-03, 6.86570242e-03, 1.25316320e+00, 4.76946516e-02, 2.71326280e-02, 7.02054326e-03, 9.83378673e-03, -5.70616748e-03, 5.50782881e-03, -8.44213953e-04, 2.05958338e-03, 1.57866569e+00, 4.31476211e-02, 2.73316392e-02, 1.05505555e-02, 1.03922250e-02, 7.36865006e-03, -6.00523246e-04, 6.53016990e-03, -1.72038113e-03, 1.24807860e-05, 1.60314173e-05, 1.21732696e-05, 5.67292645e-03, 1.92488263e-01, 5.08607199e-03 ]) ssX.var_ = ssX.scale_**2 swag_model.ssX = ssX else: ssX_file = path[:-4] + '_ssX.pkl' try: ssX = pkl.load(open(ssX_file, 'rb')) swag_model.ssX = ssX except FileNotFoundError: print(f"ssX file not found! {ssX_file}") ... return swag_model
def __init__( self, cuda=False, filebase='long_zero_megno_with_angles_power_v14_*_output.pkl'): super(FeatureRegressor, self).__init__() pwd = os.path.dirname(__file__) self.cuda = cuda #Load model import pickle as pkl self.swag_ensemble = [ spock_reg_model.load_swag(fname).cpu() for i, fname in enumerate(glob.glob(pwd + '/../' + filebase)) #0.78, 0.970 ] # Assume fixed scale: ssX = StandardScaler() ssX.scale_ = np.array([ 2.88976974e+03, 6.10019661e-02, 4.03849732e-02, 4.81638693e+01, 6.72583662e-02, 4.17939679e-02, 8.15995339e+00, 2.26871589e+01, 4.73612029e-03, 7.09223721e-02, 3.06455099e-02, 7.10726478e-01, 7.03392022e-01, 7.07873597e-01, 7.06030923e-01, 7.04728204e-01, 7.09420909e-01, 1.90740659e-01, 4.75502285e-02, 2.77188320e-02, 7.08891412e-01, 7.05214134e-01, 7.09786887e-01, 7.04371833e-01, 7.04371110e-01, 7.09828420e-01, 3.33589977e-01, 5.20857790e-02, 2.84763136e-02, 7.02210626e-01, 7.11815232e-01, 7.10512240e-01, 7.03646004e-01, 7.08017286e-01, 7.06162814e-01, 2.12569430e-05, 2.35019125e-05, 2.04211110e-05, 7.51048890e-02, 3.94254400e-01, 7.11351099e-02 ]) ssX.mean_ = np.array([ 4.95458585e+03, 5.67411891e-02, 3.83176945e-02, 2.97223474e+00, 6.29733979e-02, 3.50074471e-02, 6.72845676e-01, 9.92794768e+00, 9.99628430e-01, 5.39591547e-02, 2.92795061e-02, 2.12480714e-03, -1.01500319e-02, 1.82667162e-02, 1.00813201e-02, 5.74404197e-03, 6.86570242e-03, 1.25316320e+00, 4.76946516e-02, 2.71326280e-02, 7.02054326e-03, 9.83378673e-03, -5.70616748e-03, 5.50782881e-03, -8.44213953e-04, 2.05958338e-03, 1.57866569e+00, 4.31476211e-02, 2.73316392e-02, 1.05505555e-02, 1.03922250e-02, 7.36865006e-03, -6.00523246e-04, 6.53016990e-03, -1.72038113e-03, 1.24807860e-05, 1.60314173e-05, 1.21732696e-05, 5.67292645e-03, 1.92488263e-01, 5.08607199e-03 ]) ssX.var_ = ssX.scale_**2 self.ssX = ssX
def __init__(self, stats_path, dataset_config): """Init GL params. Args: stats_path (str): path to the `stats.npy` file containing norm statistics. dataset_config (Dict): dataset configuration parameters. """ super().__init__() scaler = StandardScaler() scaler.mean_, scaler.scale_ = np.load(stats_path) self.scaler = scaler self.ds_config = dataset_config self.mel_basis = librosa.filters.mel( self.ds_config["sampling_rate"], n_fft=self.ds_config["fft_size"], n_mels=self.ds_config["num_mels"], fmin=self.ds_config["fmin"], fmax=self.ds_config["fmax"], ) # [num_mels, fft_size // 2 + 1]
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [25.03, 10.95, 29.06, 14.07, 413.04, 14.82, 25.02, 10.93] std_list = [8.70, 6.37, 9.55, 4.71, 179.02, 1.69, 8.71, 6.39] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [23.26, 9.33, 29.02, 13.93, 370.37, 14.21, 9.33] std_list = [8.52, 6.22, 9.42, 4.71, 177.41, 2.15, 6.23] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [23.02, 12.12, 28.97, 394.57, 14.30, 23.02, 12.11] std_list = [8.15, 5.45, 9.59, 163.09, 2.01, 8.15, 5.46] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [23.21, 9.84, 29.28, 13.37, 364.57, 13.39, 23.21] std_list = [7.34, 6.17, 9.40, 3.87, 162.81, 1.89, 7.33] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [23.84, 12.54, 29.19, 11.30, 418.23, 13.37, 23.84, 12.53] std_list = [6.40, 5.55, 9.39, 3.62, 139.44, 1.96, 6.39, 5.56] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [24.42, 11.03, 28.78, 13.38, 402.95, 15.20, 24.42, 11.02] std_list = [8.54, 6.26, 9.64, 4.578, 177.87, 1.78, 8.53, 6.27] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def getStandardDataTest(self): """ Split data to train and test """ # from training original dataset mean_list = [20.30, 6.67, 30.38, 13.62, 209.17, 14.49, 20.14] std_list = [7.86, 8.31, 9.04, 4.93, 1033.36, 2.25, 8.03] # we have the input data as x, and the output as y self.x_test = np.array(self.dfData.iloc[:, :-1]) self.y_test = np.array(self.dfData.iloc[:, -1]) # standarization scaler = StandardScaler() scaler.mean_ = mean_list scaler.scale_ = std_list # x_train and x_test self.x_test = np.array(scaler.transform(self.x_test)) return self.x_test, self.y_test
def griffin_lim_lb(mel_spec, stats_path, dataset_config, n_iter=32, output_dir=None, wav_name="lb"): """Generate wave from mel spectrogram with Griffin-Lim algorithm using Librosa. Args: mel_spec (ndarray): array representing the mel spectrogram. stats_path (str): path to the `stats.npy` file containing norm statistics. dataset_config (Dict): dataset configuration parameters. n_iter (int): number of iterations for GL. output_dir (str): output directory where audio file will be saved. wav_name (str): name of the output file. Returns: gl_lb (ndarray): generated wave. """ scaler = StandardScaler() scaler.mean_, scaler.scale_ = np.load(stats_path) mel_spec = np.power(10.0, scaler.inverse_transform(mel_spec)).T mel_basis = librosa.filters.mel( dataset_config["sampling_rate"], n_fft=dataset_config["fft_size"], n_mels=dataset_config["num_mels"], fmin=dataset_config["fmin"], fmax=dataset_config["fmax"], ) mel_to_linear = np.maximum(1e-10, np.dot(np.linalg.pinv(mel_basis), mel_spec)) gl_lb = librosa.griffinlim( mel_to_linear, n_iter=n_iter, hop_length=dataset_config["hop_size"], win_length=dataset_config["win_length"] or dataset_config["fft_size"], ) if output_dir: output_path = os.path.join(output_dir, f"{wav_name}.wav") sf.write(output_path, gl_lb, dataset_config["sampling_rate"], "PCM_16") return gl_lb
from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test, w_train, w_test = train_test_split( X, y, weights, test_size=0.25, random_state=0) print("train data shape: %r, train target shape: %r, train weights shape: %r" % (X_train.shape, y_train.shape, w_train.shape)) print("test data shape: %r, test target shape: %r, test weights shape: %r" % (X_test.shape, y_test.shape, w_test.shape)) scaler = StandardScaler() means = np.mean(X_train) std = np.std(X_train) print means[0] scaler.mean_ = np.zeros(len(means)) scaler.std_ = np.ones(len(means)) for i in range(len(means)): scaler.mean_[i] = means[i] scaler.std_[i] = std[i] print scaler.mean_ #scaler.mean_ = #X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) print scaler.get_params(deep=True) print scaler.mean_ print scaler.std_ sys.exit() # Let's retrain a new model on the first subset call the **training set**: