def Train(model): reader_tr_xs = [] # this is the training data comming from other models for other in others: reader = data.PathReader(join(other, 'train'), cfg.names_tr) reader_tr_xs.append(reader) reader_tr_x_origin = data.PathReader(cfg.path_train, cfg.names_tr) # this is the original training data reader_tr_xs.append(reader_tr_x_origin) reader_tr_y = data.PathReader(cfg.path_label, cfg.names_tr) gen_tr = data.DataGenerator(reader_tr_xs, reader_tr_y).GetGenerator() reader_val_xs = [] # this is the training data comming from other models for other in others: reader = data.PathReader(join(other, 'train'), cfg.names_val) reader_val_xs.append(reader) reader_val_x_origin = data.PathReader(cfg.path_train, cfg.names_val) # this is the original training data reader_val_xs.append(reader_val_x_origin) reader_val_y = data.PathReader(cfg.path_label, cfg.names_val) gen_val = data.DataGenerator(reader_val_xs, reader_val_y).GetGenerator() train.Train(model, gen_tr, gen_val) model.save_weights(GetModelPath())
def Train(model): reader_tr_x = data.PathReader(cfg.path_train, cfg.names_tr) reader_tr_y = data.PathReader(cfg.path_label, cfg.names_tr) gen_tr = data.DataGenerator([reader_tr_x], reader_tr_y).GetGenerator() #if cfg.debug: data.DebugGenerator(gen_tr) reader_val_x = data.PathReader(cfg.path_train, cfg.names_val) reader_val_y = data.PathReader(cfg.path_label, cfg.names_val) gen_val = data.DataGenerator([reader_val_x], reader_val_y).GetGenerator() #if cfg.debug: data.DebugGenerator(gen_val) train.Train(model, gen_tr, gen_val) model.save_weights(GetModelPath())
def train(tups): experiment = tups[0] dataset = tups[1] # MAIN LOOP k_fold = 5 for k in range(0, k_fold): # Randomize train and test if k > 0: moveFiles.randomizeNumpy() # Train model m = TestModels(5, 'lrcn') outputEpochPath = os.path.join('output', 'model_checkpoints') if not (os.path.isdir(outputEpochPath)): os.makedirs(outputEpochPath) filepath = os.path.join(outputEpochPath, "expLRCN{}-5,3,1-k{}.hdf5".format(experiment, k)) checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') gen = data.DataGenerator('train', batch_size = 1, useSequences=True) val = data.DataGenerator('test', batch_size = 1, useSequences=True) callbacks = [checkpoint] m.model.fit_generator(gen, validation_data = val, epochs=50, callbacks=callbacks) # Evaluate and save path = os.path.join('output', 'model_checkpoints') m = load_model(os.path.join(path, "expLRCN{}-5,3,1-k{}.hdf5".format(experiment, k))) x,y,yseq = dataset.all_data_from_npz('test') outputCSVPath = os.path.join('output', 'csv') if not (os.path.isdir(outputCSVPath)): os.makedirs(outputCSVPath) if experiment == 'standard': tup = utils.nonNormalAccuracy(x,yseq,dataset,m) print(tup[0]) print(tup[1]) d = {'Non-Normal Accuracy': {'Acc': tup[0]}, 'Class Accuracy': tup[1]} df = pd.DataFrame(data = d) df.to_csv(os.path.join(outputCSVPath, "expLRCN{}-19-k{}.csv".format(experiment, k))) else: tup = utils.nonNormalAccuracy(x,yseq,dataset,m) d = {'Non-Normal Accuracy': {'Acc': tup[0]}} df = pd.DataFrame(data = d) df.to_csv(os.path.join(outputCSVPath, "expLRCN{}-5,3,1-k{}.csv".format(experiment, k)))
def main(): #datagen = data.DataGenerator(FLAGS.train_list, FLAGS.test_list, FLAGS.train_mask_dir, debug_dir=FLAGS.debug_dir) datagen = data.DataGenerator(FLAGS.train_list, FLAGS.test_list, FLAGS.train_mask_dir) model = unet.UNET(datagen, out_mask_dir=FLAGS.out_mask_dir, model_dir=FLAGS.model_dir) with tf.Session() as session: model.train(session)
def f(n_source, n_target, prop_target, prop_source = 0.5,\ labeled = True, d = 3, distance = 1, kernel_df = 3, beta = 3, iteration = 0): D = data.DataGenerator(d=d) x_source, y_source = D.getData(n_source, prop_source, distance=distance) x_target, y_target = D.getData(n_target, prop_target, distance=distance) x_test, y_test = D.getData(100, prop_target, distance=distance) bayes_error = D.bayes_error(prop=prop_target, distance=distance) parameter = beta, kernel_df, prop_target return_dict = setup.excess_risk(parameter, x_source, y_source, x_target,\ y_target, x_test, y_test, bayes_error, labeled=labeled) return_dict['iter'] = iteration return return_dict
def main(): # ====================== # 超参数 # ====================== CELL = "gru" # rnn, gru, lstm BATCH_SIZE = 64 ENC_EMBED_SIZE = 128 DEC_EMBED_SIZE = 128 HIDDEN_DIM = 128 NUM_LAYERS = 2 DROPOUT_RATE = 0.0 EPOCH = 200 LEARNING_RATE = 0.01 MAX_GENERATE_LENGTH = 20 SAVE_EVERY = 5 ATTENTION = True all_var = locals() print() for var in all_var: if var != "var_name": print("{0:15} ".format(var), all_var[var]) print() # ====================== # 数据 # ====================== DOMTree = xml.dom.minidom.parse('en-hr.tmx') collection = DOMTree.documentElement raw = list(collection.getElementsByTagName('tu')) raw_en = [ raw[i].childNodes[1].childNodes[0].childNodes[0].data for i in range(len(raw)) ] raw_hr = [ raw[i].childNodes[3].childNodes[0].childNodes[0].data for i in range(len(raw)) ] data_helper_en = data.DataHelper([raw_en]) data_helper_hr = data.DataHelper([raw_hr]) corpus = [data_helper_en.corpus, data_helper_hr.corpus] data_generator = data.DataGenerator(corpus) # ====================== # 构建模型 # ====================== device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = seq2seq.Seq2seq(cell=CELL, enc_vocab_size=data_helper_en.vocab_size, enc_embed_size=ENC_EMBED_SIZE, enc_hidden_dim=HIDDEN_DIM, num_layers=NUM_LAYERS, dec_vocab_size=data_helper_hr.vocab_size, dec_embed_size=DEC_EMBED_SIZE, dropout_rate=DROPOUT_RATE, use_attention=ATTENTION) model.to(device) summary(model, [(20, ), (20, )]) criteration = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE) print() # ====================== # 训练与测试 # ====================== for epoch in range(EPOCH): generator_train = data_generator.train_generator(BATCH_SIZE) generator_test = data_generator.test_generator(BATCH_SIZE) train_loss = [] while True: try: text = generator_train.__next__() except: break text = text[0] optimizer.zero_grad() x_enc = torch.from_numpy(text[0]).to(device) x_dec = torch.from_numpy(text[1][:, :-1]).to(device) y = model([x_enc, x_dec]) loss = criteration( y.reshape(-1, data_helper_hr.vocab_size), torch.from_numpy(text[1][:, 1:]).reshape(-1).long().to(device)) loss.backward() optimizer.step() train_loss.append(loss.item()) test_loss = [] while True: with torch.no_grad(): try: text = generator_test.__next__() except: break text = text[0] x_enc = torch.from_numpy(text[0]).to(device) x_dec = torch.from_numpy(text[1][:, :-1]).to(device) y = model([x_enc, x_dec]) loss = criteration( y.reshape(-1, data_helper_hr.vocab_size), torch.from_numpy( text[1][:, 1:]).reshape(-1).long().to(device)) test_loss.append(loss.item()) print('epoch {:d} training loss {:.4f} test loss {:.4f}'.format( epoch + 1, np.mean(train_loss), np.mean(test_loss))) if (epoch + 1) % SAVE_EVERY == 0: print('-----------------------------------------------------') print('saving parameters') os.makedirs('models', exist_ok=True) torch.save(model.state_dict(), 'models/seq2seq-' + str(epoch) + '.pkl') with torch.no_grad(): # 生成文本 generator_test = data_generator.test_generator(3) text = generator_test.__next__() text = text[0] x = [ torch.from_numpy(text[0]).to(device), torch.LongTensor([[data_helper_hr.w2i['_BOS']]] * 3).to(device) ] for i in range(MAX_GENERATE_LENGTH): samp = model.sample(x) x[1] = torch.cat([x[1], samp], dim=1) x[1] = x[1].cpu().numpy() for i in range(x[0].shape[0]): print(' '.join([ data_helper_en.i2w[_] for _ in list(text[0][i, :]) if _ not in [ data_helper_en.w2i['_BOS'], data_helper_en.w2i['_EOS'], data_helper_en.w2i['_PAD'] ] ])) print(' '.join([ data_helper_hr.i2w[_] for _ in list(text[1][i, :]) if _ not in [ data_helper_hr.w2i['_BOS'], data_helper_hr.w2i['_EOS'], data_helper_hr.w2i['_PAD'] ] ])) print(' '.join([ data_helper_hr.i2w[_] for _ in list(x[1][i, :]) if _ not in [ data_helper_hr.w2i['_BOS'], data_helper_hr.w2i['_EOS'], data_helper_hr.w2i['_PAD'] ] ])) print() print('-----------------------------------------------------')
def run_trial(params, trial_num, write_path="/tmp/tf/verbs"): print("\n------ TRIAL {} -----".format(trial_num)) tf.reset_default_graph() write_dir = "{}/trial_{}".format(write_path, trial_num) csv_file = "{}/trial_{}.csv".format(write_path, trial_num) # BUILD MODEL run_config = tf.estimator.RunConfig( save_checkpoints_steps=params["eval_steps"], save_checkpoints_secs=None, save_summary_steps=params["eval_steps"], ) # TODO: moar models? model = tf.estimator.Estimator(model_fn=basic_ffnn, params=params, model_dir=write_dir, config=run_config) # GENERATE DATA generator = data.DataGenerator( params["verbs"], params["num_worlds"], params["max_cells"], params["items_per_bin"], params["tries_per_bin"], params["test_bin_size"], ) train_x, train_y = generator.get_training_data() test_x, test_y = generator.get_test_data() # input fn for training train_input_fn = tf.estimator.inputs.numpy_input_fn( x={params["input_feature"]: train_x}, y=train_y, batch_size=params["batch_size"], num_epochs=params["num_epochs"], shuffle=True, ) # input fn for evaluation eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={params["input_feature"]: test_x}, y=test_y, batch_size=len(test_x), shuffle=False, ) if params["train"]: print("\n-- TRAINING --") # train and evaluate model together, using the Hook model.train( input_fn=train_input_fn, hooks=[ EvalEarlyStopHook( model, eval_input_fn, csv_file, params["eval_steps"], params["stop_loss"], ) ], ) if params["predict"]: print("\n-- PREDICTING --") predictions = pd.DataFrame(model.predict(input_fn=eval_input_fn)) predictions["true_label"] = test_y predictions["correct"] = ( predictions["class_ids"] == predictions["true_label"]).astype(int) predictions["dox_in_p"] = predictions["dox_in_p"].astype(int) predictions.to_csv("{}/trial_{}_predictions.csv".format( write_path, trial_num))
import numpy as np import plotting as pl import matplotlib.pyplot as plt from config import rawDataDir, processedDataDir, tfDataDir print("----starting up-----") modelName = "latestRadPredModel.h5" maxBatchesPerEpoch = 100 batchSize = 4 timeSteps = int(5 * 60 / 5) model = k.models.load_model(tfDataDir + modelName) generator = rd.DataGenerator(processedDataDir, dt.datetime(2016, 6, 1), dt.datetime(2016, 6, 30), maxBatchesPerEpoch, batchSize, timeSteps, False) def getMaxIndex(list): return np.where(list == np.amax(list)) print("----predicting----") maxSamples = 300 i = 0 results = [] for dataIn, dataOut in generator: predictions = model.predict(dataIn)
if k_fold: train_data_set = data.kfold_dataset(labels, loss_weight, n_folds=5) else: # split and suffle data np.random.seed(2018) indexes = np.arange(len(labels)) np.random.shuffle(indexes) train_indexes = indexes[:25500] valid_indexes = indexes[25500:] train_data_set = [[train_indexes, valid_indexes]] for i, (train_indexes, test_indexes) in enumerate(train_data_set): print("Running Fold", i + 1) # Generators training_generator = data.DataGenerator(train_data_path, train_indexes, labels, **params) validation_generator = data.DataGenerator(train_data_path, test_indexes, labels, **params) checkpoint = ModelCheckpoint('best_val_f1.h5', monitor='val_f1', verbose=1, save_best_only=True, save_weights_only=True, mode='max', period=1) #train_model.load_weights('weights/inceptionv3_lb0443.h5') # train model if n_gpu > 1: