Ejemplo n.º 1
0
def train_model_for_phone(label_feat_pairs):
    model = LinearRegression()
    labels, gops = list(zip(*label_feat_pairs))
    labels = np.array(labels).reshape(-1, 1)
    gops = np.array(gops).reshape(-1, 1)
    gops = PolynomialFeatures(2).fit_transform(gops)
    gops, labels = balanced_sampling(gops, labels)
    model.fit(gops, labels)
    return model.coef_, model.intercept_
Ejemplo n.º 2
0
def train_model_for_phone(label_feat_pairs):
    model = LinearRegression()
    labels = []
    gops = []
    for label, gop in label_feat_pairs:
        labels.append(label)
        gops.append(gop)
    labels = np.array(labels).reshape(-1, 1)
    gops = np.array(gops).reshape(-1, 1)
    gops = PolynomialFeatures(2).fit_transform(gops)
    gops, labels = balanced_sampling(gops, labels)
    model.fit(gops, labels)
    return model.coef_, model.intercept_
def train_model_for_phone(label_feat_pairs):
    model = RandomForestRegressor()
    labels = []
    feats = []
    for label, feat in label_feat_pairs:
        labels.append(label)
        feats.append(feat[1:])
    labels = np.array(labels).reshape(-1, 1)
    feats = np.array(feats).reshape(-1, len(feats[0]))
    feats, labels = balanced_sampling(feats, labels)
    labels = labels.ravel()
    model.fit(feats, labels)
    return model
Ejemplo n.º 4
0
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Prepare training data
    train_data_of = {}
    for key, gops in kaldi_io.read_post_scp(args.gop_scp):
        for i, [(ph, gop)] in enumerate(gops):
            ph_key = f'{key}.{i}'
            if ph_key not in score_of:
                print(f'Warning: no human score for {ph_key}')
                continue
            if phone_int2sym is not None and phone_int2sym[ph] != phone_of[
                    ph_key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ')
                continue
            score = score_of[ph_key]

            if ph not in train_data_of:
                train_data_of[ph] = []
            train_data_of[ph].append((score, gop))

    # Train polynomial regression
    poly = PolynomialFeatures(2)
    model_of = {}
    for ph, pairs in train_data_of.items():
        model = LinearRegression()
        labels = []
        gops = []
        for label, gop in pairs:
            labels.append(label)
            gops.append(gop)
        labels = np.array(labels).reshape(-1, 1)
        gops = np.array(gops).reshape(-1, 1)
        gops = poly.fit_transform(gops)
        gops, labels = balanced_sampling(gops, labels)
        model.fit(gops, labels)
        model_of[ph] = (model.coef_, model.intercept_)

    # Write to file
    with open(args.model, 'wb') as f:
        pickle.dump(model_of, f)
Ejemplo n.º 5
0
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Prepare training data
    train_data_of = {}
    for ph_key, feat in kaldi_io.read_vec_flt_scp(args.feature_scp):
        if ph_key not in score_of:
            print(f'Warning: no human score for {ph_key}')
            continue
        if phone_int2sym is not None:
            ph = int(feat[0])
            if phone_int2sym[ph] != phone_of[ph_key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ')
                continue
        score = score_of[ph_key]

        if ph not in train_data_of:
            train_data_of[ph] = []
        train_data_of[ph].append((score, feat))

    # Train models
    model_of = {}
    for ph, pairs in train_data_of.items():
        model = RandomForestRegressor()
        labels = []
        feats = []
        for label, feat in pairs:
            labels.append(label)
            feats.append(feat[1:])
        labels = np.array(labels).reshape(-1, 1)
        feats = np.array(feats).reshape(-1, len(feats[0]))
        feats, labels = balanced_sampling(feats, labels)
        labels = labels.ravel()
        model.fit(feats, labels)
        model_of[ph] = model
        print(f'Model of phone {ph} trained.')

    # Write to file
    with open(args.model, 'wb') as f:
        pickle.dump(model_of, f)