def train_model_for_phone(label_feat_pairs): model = LinearRegression() labels, gops = list(zip(*label_feat_pairs)) labels = np.array(labels).reshape(-1, 1) gops = np.array(gops).reshape(-1, 1) gops = PolynomialFeatures(2).fit_transform(gops) gops, labels = balanced_sampling(gops, labels) model.fit(gops, labels) return model.coef_, model.intercept_
def train_model_for_phone(label_feat_pairs): model = LinearRegression() labels = [] gops = [] for label, gop in label_feat_pairs: labels.append(label) gops.append(gop) labels = np.array(labels).reshape(-1, 1) gops = np.array(gops).reshape(-1, 1) gops = PolynomialFeatures(2).fit_transform(gops) gops, labels = balanced_sampling(gops, labels) model.fit(gops, labels) return model.coef_, model.intercept_
def train_model_for_phone(label_feat_pairs): model = RandomForestRegressor() labels = [] feats = [] for label, feat in label_feat_pairs: labels.append(label) feats.append(feat[1:]) labels = np.array(labels).reshape(-1, 1) feats = np.array(feats).reshape(-1, len(feats[0])) feats, labels = balanced_sampling(feats, labels) labels = labels.ravel() model.fit(feats, labels) return model
def main(): args = get_args() # Phone symbol table _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table) # Human expert scores score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1) # Prepare training data train_data_of = {} for key, gops in kaldi_io.read_post_scp(args.gop_scp): for i, [(ph, gop)] in enumerate(gops): ph_key = f'{key}.{i}' if ph_key not in score_of: print(f'Warning: no human score for {ph_key}') continue if phone_int2sym is not None and phone_int2sym[ph] != phone_of[ ph_key]: print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ') continue score = score_of[ph_key] if ph not in train_data_of: train_data_of[ph] = [] train_data_of[ph].append((score, gop)) # Train polynomial regression poly = PolynomialFeatures(2) model_of = {} for ph, pairs in train_data_of.items(): model = LinearRegression() labels = [] gops = [] for label, gop in pairs: labels.append(label) gops.append(gop) labels = np.array(labels).reshape(-1, 1) gops = np.array(gops).reshape(-1, 1) gops = poly.fit_transform(gops) gops, labels = balanced_sampling(gops, labels) model.fit(gops, labels) model_of[ph] = (model.coef_, model.intercept_) # Write to file with open(args.model, 'wb') as f: pickle.dump(model_of, f)
def main(): args = get_args() # Phone symbol table _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table) # Human expert scores score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1) # Prepare training data train_data_of = {} for ph_key, feat in kaldi_io.read_vec_flt_scp(args.feature_scp): if ph_key not in score_of: print(f'Warning: no human score for {ph_key}') continue if phone_int2sym is not None: ph = int(feat[0]) if phone_int2sym[ph] != phone_of[ph_key]: print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ') continue score = score_of[ph_key] if ph not in train_data_of: train_data_of[ph] = [] train_data_of[ph].append((score, feat)) # Train models model_of = {} for ph, pairs in train_data_of.items(): model = RandomForestRegressor() labels = [] feats = [] for label, feat in pairs: labels.append(label) feats.append(feat[1:]) labels = np.array(labels).reshape(-1, 1) feats = np.array(feats).reshape(-1, len(feats[0])) feats, labels = balanced_sampling(feats, labels) labels = labels.ravel() model.fit(feats, labels) model_of[ph] = model print(f'Model of phone {ph} trained.') # Write to file with open(args.model, 'wb') as f: pickle.dump(model_of, f)