Exemple #1
0
def Train(model):
    reader_tr_xs = []
    # this is the training data comming from other models
    for other in others:
        reader = data.PathReader(join(other, 'train'), cfg.names_tr)
        reader_tr_xs.append(reader)
        
    reader_tr_x_origin = data.PathReader(cfg.path_train, cfg.names_tr)
    # this is the original training data
    reader_tr_xs.append(reader_tr_x_origin)
    reader_tr_y = data.PathReader(cfg.path_label, cfg.names_tr)
    gen_tr = data.DataGenerator(reader_tr_xs, reader_tr_y).GetGenerator()
        
    
    reader_val_xs = []
    # this is the training data comming from other models
    for other in others:
        reader = data.PathReader(join(other, 'train'), cfg.names_val)
        reader_val_xs.append(reader)
    reader_val_x_origin = data.PathReader(cfg.path_train, cfg.names_val)
    # this is the original training data
    reader_val_xs.append(reader_val_x_origin)
    reader_val_y = data.PathReader(cfg.path_label, cfg.names_val)
    gen_val = data.DataGenerator(reader_val_xs, reader_val_y).GetGenerator()
        
    train.Train(model, gen_tr, gen_val)
    model.save_weights(GetModelPath())
Exemple #2
0
def Train(model):
    reader_tr_x = data.PathReader(cfg.path_train, cfg.names_tr)
    reader_tr_y = data.PathReader(cfg.path_label, cfg.names_tr)
    gen_tr = data.DataGenerator([reader_tr_x], reader_tr_y).GetGenerator()
    #if cfg.debug: data.DebugGenerator(gen_tr)

    reader_val_x = data.PathReader(cfg.path_train, cfg.names_val)
    reader_val_y = data.PathReader(cfg.path_label, cfg.names_val)
    gen_val = data.DataGenerator([reader_val_x], reader_val_y).GetGenerator()
    #if cfg.debug: data.DebugGenerator(gen_val)

    train.Train(model, gen_tr, gen_val)
    model.save_weights(GetModelPath())
Exemple #3
0
def train(tups):
    experiment = tups[0]
    dataset = tups[1]

    # MAIN LOOP

    k_fold = 5
    for k in range(0, k_fold):
        # Randomize train and test
        if k > 0:
            moveFiles.randomizeNumpy()
        
        # Train model
        m = TestModels(5, 'lrcn')
        outputEpochPath = os.path.join('output', 'model_checkpoints') 
        if not (os.path.isdir(outputEpochPath)):
            os.makedirs(outputEpochPath)

        filepath = os.path.join(outputEpochPath, "expLRCN{}-5,3,1-k{}.hdf5".format(experiment, k))
        checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

        gen = data.DataGenerator('train', batch_size = 1, useSequences=True)
        val = data.DataGenerator('test', batch_size = 1, useSequences=True)

        callbacks = [checkpoint]
        m.model.fit_generator(gen, validation_data = val, epochs=50, callbacks=callbacks) 
        
        # Evaluate and save
        path = os.path.join('output', 'model_checkpoints')
        m = load_model(os.path.join(path, "expLRCN{}-5,3,1-k{}.hdf5".format(experiment, k)))
        
        x,y,yseq = dataset.all_data_from_npz('test')
        
        outputCSVPath = os.path.join('output', 'csv') 
        if not (os.path.isdir(outputCSVPath)):
            os.makedirs(outputCSVPath)
        
        if experiment == 'standard':
            tup = utils.nonNormalAccuracy(x,yseq,dataset,m) 
            print(tup[0])
            print(tup[1])
            d = {'Non-Normal Accuracy': {'Acc': tup[0]}, 'Class Accuracy': tup[1]}
            df = pd.DataFrame(data = d)
            df.to_csv(os.path.join(outputCSVPath, "expLRCN{}-19-k{}.csv".format(experiment, k)))
        else:
            tup = utils.nonNormalAccuracy(x,yseq,dataset,m) 
            d = {'Non-Normal Accuracy': {'Acc': tup[0]}}
            df = pd.DataFrame(data = d)
            df.to_csv(os.path.join(outputCSVPath, "expLRCN{}-5,3,1-k{}.csv".format(experiment, k)))
Exemple #4
0
def main():
    #datagen = data.DataGenerator(FLAGS.train_list, FLAGS.test_list, FLAGS.train_mask_dir, debug_dir=FLAGS.debug_dir)
    datagen = data.DataGenerator(FLAGS.train_list, FLAGS.test_list,
                                 FLAGS.train_mask_dir)
    model = unet.UNET(datagen,
                      out_mask_dir=FLAGS.out_mask_dir,
                      model_dir=FLAGS.model_dir)
    with tf.Session() as session:
        model.train(session)
Exemple #5
0
def f(n_source, n_target, prop_target, prop_source = 0.5,\
     labeled = True, d = 3, distance = 1, kernel_df = 3, beta = 3, iteration = 0):

    D = data.DataGenerator(d=d)
    x_source, y_source = D.getData(n_source, prop_source, distance=distance)
    x_target, y_target = D.getData(n_target, prop_target, distance=distance)
    x_test, y_test = D.getData(100, prop_target, distance=distance)
    bayes_error = D.bayes_error(prop=prop_target, distance=distance)
    parameter = beta, kernel_df, prop_target
    return_dict = setup.excess_risk(parameter, x_source, y_source, x_target,\
         y_target, x_test, y_test, bayes_error, labeled=labeled)

    return_dict['iter'] = iteration
    return return_dict
def main():
    # ======================
    # 超参数
    # ======================
    CELL = "gru"  # rnn, gru, lstm
    BATCH_SIZE = 64
    ENC_EMBED_SIZE = 128
    DEC_EMBED_SIZE = 128
    HIDDEN_DIM = 128
    NUM_LAYERS = 2
    DROPOUT_RATE = 0.0
    EPOCH = 200
    LEARNING_RATE = 0.01
    MAX_GENERATE_LENGTH = 20
    SAVE_EVERY = 5
    ATTENTION = True

    all_var = locals()
    print()
    for var in all_var:
        if var != "var_name":
            print("{0:15}   ".format(var), all_var[var])
    print()

    # ======================
    # 数据
    # ======================
    DOMTree = xml.dom.minidom.parse('en-hr.tmx')
    collection = DOMTree.documentElement
    raw = list(collection.getElementsByTagName('tu'))
    raw_en = [
        raw[i].childNodes[1].childNodes[0].childNodes[0].data
        for i in range(len(raw))
    ]
    raw_hr = [
        raw[i].childNodes[3].childNodes[0].childNodes[0].data
        for i in range(len(raw))
    ]
    data_helper_en = data.DataHelper([raw_en])
    data_helper_hr = data.DataHelper([raw_hr])
    corpus = [data_helper_en.corpus, data_helper_hr.corpus]
    data_generator = data.DataGenerator(corpus)

    # ======================
    # 构建模型
    # ======================
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = seq2seq.Seq2seq(cell=CELL,
                            enc_vocab_size=data_helper_en.vocab_size,
                            enc_embed_size=ENC_EMBED_SIZE,
                            enc_hidden_dim=HIDDEN_DIM,
                            num_layers=NUM_LAYERS,
                            dec_vocab_size=data_helper_hr.vocab_size,
                            dec_embed_size=DEC_EMBED_SIZE,
                            dropout_rate=DROPOUT_RATE,
                            use_attention=ATTENTION)
    model.to(device)
    summary(model, [(20, ), (20, )])
    criteration = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
    print()

    # ======================
    # 训练与测试
    # ======================
    for epoch in range(EPOCH):
        generator_train = data_generator.train_generator(BATCH_SIZE)
        generator_test = data_generator.test_generator(BATCH_SIZE)
        train_loss = []
        while True:
            try:
                text = generator_train.__next__()
            except:
                break
            text = text[0]
            optimizer.zero_grad()
            x_enc = torch.from_numpy(text[0]).to(device)
            x_dec = torch.from_numpy(text[1][:, :-1]).to(device)
            y = model([x_enc, x_dec])
            loss = criteration(
                y.reshape(-1, data_helper_hr.vocab_size),
                torch.from_numpy(text[1][:, 1:]).reshape(-1).long().to(device))
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())

        test_loss = []
        while True:
            with torch.no_grad():
                try:
                    text = generator_test.__next__()
                except:
                    break
                text = text[0]
                x_enc = torch.from_numpy(text[0]).to(device)
                x_dec = torch.from_numpy(text[1][:, :-1]).to(device)
                y = model([x_enc, x_dec])
                loss = criteration(
                    y.reshape(-1, data_helper_hr.vocab_size),
                    torch.from_numpy(
                        text[1][:, 1:]).reshape(-1).long().to(device))
                test_loss.append(loss.item())

        print('epoch {:d}   training loss {:.4f}    test loss {:.4f}'.format(
            epoch + 1, np.mean(train_loss), np.mean(test_loss)))

        if (epoch + 1) % SAVE_EVERY == 0:
            print('-----------------------------------------------------')
            print('saving parameters')
            os.makedirs('models', exist_ok=True)
            torch.save(model.state_dict(),
                       'models/seq2seq-' + str(epoch) + '.pkl')

            with torch.no_grad():
                # 生成文本
                generator_test = data_generator.test_generator(3)
                text = generator_test.__next__()
                text = text[0]
                x = [
                    torch.from_numpy(text[0]).to(device),
                    torch.LongTensor([[data_helper_hr.w2i['_BOS']]] *
                                     3).to(device)
                ]
                for i in range(MAX_GENERATE_LENGTH):
                    samp = model.sample(x)
                    x[1] = torch.cat([x[1], samp], dim=1)
                x[1] = x[1].cpu().numpy()
            for i in range(x[0].shape[0]):
                print(' '.join([
                    data_helper_en.i2w[_] for _ in list(text[0][i, :])
                    if _ not in [
                        data_helper_en.w2i['_BOS'], data_helper_en.w2i['_EOS'],
                        data_helper_en.w2i['_PAD']
                    ]
                ]))
                print(' '.join([
                    data_helper_hr.i2w[_] for _ in list(text[1][i, :])
                    if _ not in [
                        data_helper_hr.w2i['_BOS'], data_helper_hr.w2i['_EOS'],
                        data_helper_hr.w2i['_PAD']
                    ]
                ]))
                print(' '.join([
                    data_helper_hr.i2w[_] for _ in list(x[1][i, :])
                    if _ not in [
                        data_helper_hr.w2i['_BOS'], data_helper_hr.w2i['_EOS'],
                        data_helper_hr.w2i['_PAD']
                    ]
                ]))
                print()
            print('-----------------------------------------------------')
Exemple #7
0
def run_trial(params, trial_num, write_path="/tmp/tf/verbs"):

    print("\n------ TRIAL {} -----".format(trial_num))

    tf.reset_default_graph()

    write_dir = "{}/trial_{}".format(write_path, trial_num)
    csv_file = "{}/trial_{}.csv".format(write_path, trial_num)

    # BUILD MODEL
    run_config = tf.estimator.RunConfig(
        save_checkpoints_steps=params["eval_steps"],
        save_checkpoints_secs=None,
        save_summary_steps=params["eval_steps"],
    )

    # TODO: moar models?
    model = tf.estimator.Estimator(model_fn=basic_ffnn,
                                   params=params,
                                   model_dir=write_dir,
                                   config=run_config)

    # GENERATE DATA
    generator = data.DataGenerator(
        params["verbs"],
        params["num_worlds"],
        params["max_cells"],
        params["items_per_bin"],
        params["tries_per_bin"],
        params["test_bin_size"],
    )

    train_x, train_y = generator.get_training_data()
    test_x, test_y = generator.get_test_data()

    # input fn for training
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={params["input_feature"]: train_x},
        y=train_y,
        batch_size=params["batch_size"],
        num_epochs=params["num_epochs"],
        shuffle=True,
    )

    # input fn for evaluation
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={params["input_feature"]: test_x},
        y=test_y,
        batch_size=len(test_x),
        shuffle=False,
    )

    if params["train"]:
        print("\n-- TRAINING --")
        # train and evaluate model together, using the Hook
        model.train(
            input_fn=train_input_fn,
            hooks=[
                EvalEarlyStopHook(
                    model,
                    eval_input_fn,
                    csv_file,
                    params["eval_steps"],
                    params["stop_loss"],
                )
            ],
        )

    if params["predict"]:
        print("\n-- PREDICTING --")
        predictions = pd.DataFrame(model.predict(input_fn=eval_input_fn))
        predictions["true_label"] = test_y
        predictions["correct"] = (
            predictions["class_ids"] == predictions["true_label"]).astype(int)
        predictions["dox_in_p"] = predictions["dox_in_p"].astype(int)
        predictions.to_csv("{}/trial_{}_predictions.csv".format(
            write_path, trial_num))
Exemple #8
0
import numpy as np
import plotting as pl
import matplotlib.pyplot as plt
from config import rawDataDir, processedDataDir, tfDataDir


print("----starting up-----")
modelName = "latestRadPredModel.h5"
maxBatchesPerEpoch = 100
batchSize = 4
timeSteps = int(5 * 60 / 5)



model = k.models.load_model(tfDataDir + modelName)
generator = rd.DataGenerator(processedDataDir, dt.datetime(2016, 6, 1), dt.datetime(2016, 6, 30), maxBatchesPerEpoch, batchSize, timeSteps, False)




def getMaxIndex(list):
    return np.where(list == np.amax(list))


print("----predicting----")
maxSamples = 300
i = 0
results = []
for dataIn, dataOut in generator:

    predictions = model.predict(dataIn)
Exemple #9
0
if k_fold:
    train_data_set = data.kfold_dataset(labels, loss_weight, n_folds=5)
else:
    # split and suffle data
    np.random.seed(2018)
    indexes = np.arange(len(labels))
    np.random.shuffle(indexes)
    train_indexes = indexes[:25500]
    valid_indexes = indexes[25500:]
    train_data_set = [[train_indexes, valid_indexes]]

for i, (train_indexes, test_indexes) in enumerate(train_data_set):
    print("Running Fold", i + 1)

    # Generators
    training_generator = data.DataGenerator(train_data_path, train_indexes,
                                            labels, **params)
    validation_generator = data.DataGenerator(train_data_path, test_indexes,
                                              labels, **params)

    checkpoint = ModelCheckpoint('best_val_f1.h5',
                                 monitor='val_f1',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=True,
                                 mode='max',
                                 period=1)

    #train_model.load_weights('weights/inceptionv3_lb0443.h5')

    # train model
    if n_gpu > 1: