Exemple #1
0
def train():
    model = get_model()

    train_datagen = data_generator(batch_size=batch_size)

    loss_names_stage1 = ["ILC_mse_loss", "ILC_rank_loss", "ILC_class_loss"]
    loss_names_stage2 = [
        "ILC_mse_loss", "ILC_rank_loss", "ILC_class_loss", "ILC_spatial_loss"
    ]
    regex_stage1 = r"(ILC\_.*)"
    regex_stage2 = ".*"
    log_dir = "./log"
    checkpoint_dir = "./checkpoints/weights.{epoch:03d}.hdf5"
    pretrained_dir = "./pretrained/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5"

    # train stage 1
    load_weights(pretrained_dir, model, by_name=True)
    set_trainable(model, regex_stage1, 1)
    compile(model, loss_names_stage1, lr=0.01, clipnorm=True)

    callbacks = [
        keras.callbacks.TensorBoard(log_dir=log_dir,
                                    histogram_freq=0,
                                    write_graph=True,
                                    write_images=False),
        keras.callbacks.ModelCheckpoint(checkpoint_dir,
                                        verbose=0,
                                        mode="min",
                                        save_weights_only=True)
    ]

    model.fit_generator(train_datagen,
                        epochs=30,
                        steps_per_epoch=35,
                        callbacks=callbacks,
                        max_queue_size=100,
                        workers=0,
                        use_multiprocessing=True)
Exemple #2
0
                   sep="\t\t",
                   names=['a', 'b', 'current', 'label'],
                   dtype=str,
                   engine='python')
df.dropna(how='any', inplace=True)
train_length = int(len(df) * 0.9)
train_df = df.iloc[:train_length].iloc[:, :]
valid_df = df.iloc[train_length:]
#train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42)
valid_df['eval_label'] = valid_df['label'].apply(lambda x: ' '.join(list(x)))
# 加载数据集
train_data = generate_label(train_df, tokenizer)
valid_data = generate_label(valid_df, tokenizer, is_valid=True)

# 转换数据集
train_generator = data_generator(train_data, batch_size)
valid_generator = data_generator(valid_data, batch_size)

model = taggerRewriterModel(model_name='albert',
                            config_path=config_path,
                            checkpoint_path=checkpoint_path,
                            num_classes=num_classes,
                            learning_rate=lr)


class Evaluator(keras.callbacks.Callback):
    """metrics and save best model
    """
    def __init__(self):
        self.best_em = 0.
Exemple #3
0
from tqdm import tqdm
from util import load_data, get_tokens, get_keep_tokens
from config import BaseConfig
from model import get_model
from dataset import data_generator

# 加载数据集
data = load_data(BaseConfig.train_path)
train_data = [d for i, d in enumerate(data) if i % 10 != 0]
valid_data = [d for i, d in enumerate(data) if i % 10 == 0]
test_data = load_data(BaseConfig.test_path)

test_generator = data_generator(test_data, BaseConfig.batch_size)
# 数据集词频
tokens = get_tokens(data + test_data)

# BERT词频
keep_tokens = get_keep_tokens()

model = get_model(tokens, keep_tokens)
model.load_weights('best_model.h5')

F = open("result.csv", mode="w")
for x_true, _ in tqdm(test_generator):
    y_pred = model.predict(x_true)[:, 0, 5:7]
    y_pred = y_pred[:, 1] / (y_pred.sum(axis=1) + 1e-8)
    for p in y_pred:
        F.write('%f\n' % p)
F.close()
num_layers = 2
dropout_keep = 1.0

dataset_path = './data/1984.txt'
model_name = model_hash + '-' + str(num_layers) + 'l-' + str(
    state_size) + 's-' + str(batch_size) + 'bs'
checkpoint = './checkpoints/' + model_name + '.ckpt'
logs = './logs/' + model_name

if mode == 'generate':
    batch_size = 1
    dropout_keep = 1
    num_steps = 1

train_set = data_generator(dataset_path,
                           batch_size=batch_size,
                           num_steps=num_steps)

graph = build_graph(state_size=state_size,
                    nb_class=train_set.n_classes,
                    batch_size=batch_size,
                    num_steps=num_steps,
                    dropout_keep=dropout_keep,
                    num_layers=num_layers)

if mode == 'train':
    train_network(graph,
                  num_steps=num_steps,
                  batch_size=batch_size,
                  checkpoint=checkpoint,
                  logs=logs)
Exemple #5
0
    def get_dataloader(self):
        args = self.args
        # define transforms
        scaler = StandardScaler()
        scaler.mean_ = read_hdf5(args.stats, "/" + args.feature_type + "/mean")
        scaler.scale_ = read_hdf5(args.stats,
                                  "/" + args.feature_type + "/scale")
        wav_transform = transforms.Compose(
            [lambda x: encode_mu_law(x, args.n_quantize)])
        feat_transform = transforms.Compose([lambda x: scaler.transform(x)])

        # define generator
        if os.path.isdir(args.waveforms):
            filenames = sorted(
                find_files(args.waveforms, "*.wav", use_dir_name=False))
            wav_list_train = [
                args.waveforms + "/" + filename for filename in filenames
            ]
            feat_list_train = [
                args.feats + "/" + filename.replace(".wav", ".h5")
                for filename in filenames
            ]

        elif os.path.isfile(args.waveforms):
            wav_list_train = read_txt(args.waveforms)
            feat_list_train = read_txt(args.feats)
        else:
            logging.error("--waveforms should be directory or list.")
            sys.exit(1)
        assert len(wav_list_train) == len(feat_list_train)
        logging.info("number of training data = %d." % len(wav_list_train))
        generator = data_generator(
            wav_list_train,
            feat_list_train,
            receptive_field=self.model.receptive_field,
            batch_length=args.batch_length,
            batch_size=args.batch_size,
            feature_type=args.feature_type,
            wav_transform=wav_transform,
            feat_transform=feat_transform,
            shuffle=True,
            upsampling_factor=args.upsampling_factor,
            use_upsampling_layer=args.use_upsampling_layer,
            use_speaker_code=args.use_speaker_code,
            use_pulse=args.use_pulse)

        test_generator = data_generator(
            wav_list_test[:args.batch_size],
            feat_list_test[:args.batch_size],
            receptive_field=self.model.receptive_field,
            batch_length=args.batch_length,
            batch_size=args.batch_size,
            feature_type=args.feature_type,
            wav_transform=wav_transform,
            feat_transform=feat_transform,
            shuffle=False,
            upsampling_factor=args.upsampling_factor,
            use_upsampling_layer=args.use_upsampling_layer,
            use_speaker_code=args.use_speaker_code,
            use_pulse=args.use_pulse)

        # charge minibatch in queue
        while not generator.queue.full():
            time.sleep(0.1)

        return generator, test_generator
Exemple #6
0
#     data.append((d[0], d[1], -5))
labels = []
for d in data:
    labels.append(d[2])

if __name__ == '__main__':
    skf = StratifiedKFold(5, shuffle=True, random_state=2021)
    for fold_id, (train_id,
                  valid_id) in enumerate(skf.split(range(len(data)), labels)):
        train_data = np.array(data)[train_id]
        valid_data = np.array(data)[valid_id]
        train_data = list(train_data)
        valid_data = list(valid_data)
        for d in test_data:
            train_data.append((d[0], d[1], -5))
        train_generator = data_generator(train_data, BaseConfig.batch_size)
        valid_generator = data_generator(valid_data, BaseConfig.batch_size)
        K.clear_session()
        seed(SEED + fold_id)
        np.random.seed(SEED + fold_id)
        tf.random.set_random_seed(SEED + fold_id)
        # 加载预训练模型
        model = get_model(tokens, keep_tokens)
        adv_layer_names = ['Embedding-Token']
        adversarial_training(model, adv_layer_names, 0.5)
        evaluator = Evaluator(model, valid_generator, fold_id, True)
        warmup = Warmup(decay=2e-5, warmup_epochs=3)
        model.fit(train_generator.forfit(),
                  steps_per_epoch=len(train_generator),
                  epochs=80,
                  callbacks=[evaluator, warmup])
import numpy as np
import matplotlib.pyplot as plt
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from dataset import data_generator
plt.style.use('ggplot')

# In[17]:

## choose uniform mixture gaussian or weighted mixture gaussian
dset = data_generator()
#dset.random_distribution()
dset.uniform_distribution()

# In[18]:


def plot(points, title):

    plt.scatter(points[:, 0], points[:, 1], s=10, c='b', alpha=0.5)
    plt.scatter(dset.centers[:, 0],
                dset.centers[:, 1],
                s=100,
                c='g',
                alpha=0.5)
    plt.title(title)
Exemple #8
0
def main():

    ##########          Linear Regression with degree=1,5,10,14          ##########
    if Path('./data/data.csv').is_file()==False:
        data_generator('linear','./data/data.csv',20)

    data_set=RegressionDataset('./data/data.csv')
    total_sz=data_set.__len__()
    test_sz=int(0.25*total_sz)
    train_set,test_set=random_split(data_set,[total_sz-test_sz,test_sz])

    origin_train_set=train_set
    origin_test_set=test_set

    print("Regression Linear Leave One Out Degree 1 20 Data Points")
    weight_loo_deg1=loo_train(train_set,1)
    test(test_set,1,weight_loo_deg1)

    print("Regression Linear Five Fold Degree 1 20 Data Points")
    weight_kf_deg1=kf_train(train_set,1)
    test(test_set,1,weight_kf_deg1)

    print("Regression Linear Leave One Out Degree 5 20 Data Points")
    weight_loo_deg5=loo_train(train_set,5)
    test(test_set,5,weight_loo_deg5)

    print("Regression Linear Five Fold Degree 5 20 Data Points")
    weight_kf_deg5=kf_train(train_set,5)
    test(test_set,5,weight_kf_deg5)

    print("Regression Linear Leave One Out Degree 10 20 Data Points")
    weight_loo_deg10=loo_train(train_set,10)
    test(test_set,10,weight_loo_deg10)

    print("Regression Linear Five Fold Degree 10 20 Data Points")
    weight_kf_deg10=kf_train(train_set,10)
    test(test_set,10,weight_kf_deg10)

    print("Regression Linear Leave One Out Degree 14 20 Data Points")
    weight_loo_deg14=loo_train(train_set,14)
    test(test_set,14,weight_loo_deg14)

    print("Regression Linear Five Fold Degree 14 20 Data Points")
    weight_kf_deg14=kf_train(train_set,14)
    test(test_set,14,weight_kf_deg14)

    draw_fitting_plot(train_set,[-3,3],weight_loo_deg1,1,['degree=1','degree=5','degree=10','degree=14'],'Linear Data Live One Out Curve','./figure/linear-loo.jpg',weight_loo_deg5,5,weight_loo_deg10,10,weight_loo_deg14,14)
    draw_fitting_plot(train_set,[-3,3],weight_kf_deg1,1,['degree=1','degree=5','degree=10','degree=14'],'Linear Data Five Folds Curve','./figure/linear-kf.jpg',weight_kf_deg5,5,weight_kf_deg10,10,weight_kf_deg14,14)

    ##########          Linear Regression on Sine Curve Data with degree=5,10,14          ##########
    if Path('./data/sin_data.csv').is_file()==False:
        data_generator('sin','./data/sin_data.csv',20)

    data_set=RegressionDataset('./data/sin_data.csv')
    total_sz=data_set.__len__()
    test_sz=int(0.25*total_sz)
    train_set,test_set=random_split(data_set,[total_sz-test_sz,test_sz])
    
    print('Regression Sine Leave One Out Degree 5 20 Data Points')
    weight_loo_deg5=loo_train(train_set,5)
    test(test_set,5,weight_loo_deg5)
    
    print('Regression Sine Five Fold Degree 5 20 Data Points')
    weight_kf_deg5=kf_train(train_set,5)
    test(test_set,5,weight_kf_deg5)

    print('Regression Sine Leave One Out Degree 10 20 Data Points')
    weight_loo_deg10=loo_train(train_set,10)
    test(test_set,10,weight_loo_deg10)
    
    print('Regression Sine Five Fold Degree 10 20 Data Points')
    weight_kf_deg10=kf_train(train_set,10)
    test(test_set,10,weight_kf_deg10)

    print('Regression Sine Leave One Out Degree 14 20 Data Points')
    weight_loo_deg14=loo_train(train_set,14)
    test(test_set,14,weight_loo_deg14)
    
    print('Regression Sine Five Fold Degree 14 20 Data Points')
    weight_kf_deg14=kf_train(train_set,14)
    test(test_set,14,weight_kf_deg14)

    draw_fitting_plot(train_set,[0,1],weight_loo_deg5,5,['degree=5','degree=10','degree=14'],'Sine Data Live One Out Curve','./figure/sine-loo.jpg',weight_loo_deg10,10,weight_loo_deg14,14)
    draw_fitting_plot(train_set,[0,1],weight_kf_deg5,5,['degree=5','degree=10','degree=14'],'Sine Data Five Folds Curve','./figure/sine-kf.jpg',weight_kf_deg10,10,weight_kf_deg14,14)

    ##########          Linear Regression on Different Training Data Size          ##########  
    if Path('./data/data_320.csv').is_file()==False:
        data_generator('linear','./data/data_320.csv',320)
    
    data_set=RegressionDataset('./data/data_320.csv')
    total_sz=data_set.__len__()
    delete_sz=260
    use_set,delete_set=random_split(data_set,[total_sz-delete_sz,delete_sz])

    total_sz=use_set.__len__()
    test_sz=int(0.25*total_sz)
    train_set,test_set=random_split(use_set,[total_sz-test_sz,test_sz])

    print('Regression Linear Leave One Out Degree 14 60 Data Points')
    weight_loo_data60=loo_train(train_set,14)
    test(test_set,14,weight_loo_data60)

    print('Regression Linear Five Fold Degree 14 60 Data Points')
    weight_kf_data60=kf_train(train_set,14)
    test(test_set,14,weight_kf_data60)

    total_sz=data_set.__len__()
    delete_sz=160
    use_set,delete_set=random_split(data_set,[total_sz-delete_sz,delete_sz])

    total_sz=use_set.__len__()
    test_sz=int(0.25*total_sz)
    train_set,test_set=random_split(use_set,[total_sz-test_sz,test_sz])

    print('Regression Linear Leave One Out Degree 14 160 Data Points')
    weight_loo_data160=loo_train(train_set,14)
    test(test_set,14,weight_loo_data160)

    print('Regression Linear Five Fold Degree 14 160 Data Points')
    weight_kf_data160=kf_train(train_set,14)
    test(test_set,14,weight_kf_data160)

    total_sz=data_set.__len__()
    delete_sz=0
    use_set,delete_set=random_split(data_set,[total_sz-delete_sz,delete_sz])

    total_sz=use_set.__len__()
    test_sz=int(0.25*total_sz)
    train_set,test_set=random_split(use_set,[total_sz-test_sz,test_sz])

    print('Regression Linear Leave One Out Degree 14 320 Data Points')
    weight_loo_data320=loo_train(train_set,14)
    test(test_set,14,weight_loo_data320)

    print('Regression Linear Five Fold Degree 14 320 Data Points')
    weight_kf_data320=kf_train(train_set,14)
    test(test_set,14,weight_kf_data320)

    draw_fitting_plot(origin_train_set,[-3,3],weight_loo_data60,14,['m=60','m=160','m=320'],'Linear Data Different m Live One Out Curve','./figure/data-m-loo.jpg',weight_loo_data160,14,weight_loo_data320,14)
    draw_fitting_plot(origin_train_set,[-3,3],weight_kf_data60,14,['m=60','m=160','m=320'],'Linear Data Different m Five Folds Curve','./figure/data-m-kf.jpg',weight_kf_data160,14,weight_kf_data320,14)

    ##########          Linear Regression with Regularization Term lambda          ##########
    print('Regularization 0.001/m Linear Five Fold Degree 14 20 Data Points')
    _lambda=0.001/20
    weight_kf_0001l=kf_train(origin_train_set,14,_lambda)
    test(origin_test_set,14,weight_kf_0001l)
    
    print('Regularization 1/m Linear Five Fold Degree 14 20 Data Points')
    _lambda=float(1)/20
    weight_kf_1l=kf_train(origin_train_set,14,_lambda)
    test(origin_test_set,14,weight_kf_1l)

    print('Regularization 1000/m Linear Five Fold Degree 14 20 Data Points')
    _lambda=float(1000)/20
    weight_kf_1000l=kf_train(origin_train_set,14,_lambda)
    test(origin_test_set,14,weight_kf_1000l)

    draw_fitting_plot(origin_train_set,[-3,3],weight_kf_0001l,14,['0.001/m','1/m','1000/m'],'Linear Data Five Fold with Regularization Curve','./figure/regularization-kf.jpg',weight_kf_1l,14,weight_kf_1000l,14)
Exemple #9
0
class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        clear_output(wait=True)
        show_predictions()
        print('\nSample Prediction after epoch {}\n'.format(epoch+1))


if __name__ == "__main__":
    # Prepare data for training
    train_ds = tf.data.Dataset.list_files(
        os.path.join(data_dir, "train-*.tfrecord"))
    val_ds = tf.data.Dataset.list_files(
        os.path.join(data_dir, "val-*.tfrecord"))

    train_batches = data_generator(
        train_ds, is_training=True, img_height=img_height, img_width=img_width, batch_size=batch_size)
    val_batches = data_generator(
        val_ds, is_training=False, img_height=img_height, img_width=img_width, batch_size=batch_size)

    # Visualize sample
    for image, mask in train_batches.take(1):
        sample_image, sample_mask = image[0], mask[0]
    display([sample_image, sample_mask])

    # Prepare model for training
    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    strategy = tf.distribute.MirroredStrategy()

    with strategy.scope():
        model = deeplabv3(img_height, img_width, n_classes)
Exemple #10
0
for i, label in enumerate(labels):
    if label != curr_label:
        ts.append(series[ind:i])
        ind = i
        curr_label = label

data = []
for i, series in enumerate(ts):
    if len(series) >= 49:
        data.append(series[:49])

for i, series in enumerate(data):
    for idx, point in enumerate(data[i]):
        data[i][idx] += 1

generator = data_generator(50, 2, config['output_size'])
data = []
i = 0
for series in generator:
    i += 1
    data.append(series)
    if i == 3000:
        break
print('data generated!')

dataset = DatasetTS(data, config['output_size'])
dataloader = DataLoader(dataset, batch_size=config['batch_size'], shuffle=True)
model = ESRNN_model(num_series=len(dataset), configuration=config)
tr = TrainESRNN(model, dataloader, config)

print('Starting to run the model!')