def train(train, dev, model, model_dir, batch_size, glove, beam_size, samples_per_epoch, val_samples, cmodel, epochs): if not os.path.exists(model_dir): os.makedirs(model_dir) hypo_len = model.get_layer('hypo_input').input_shape[1] -1 ne = model.get_layer('noise_embeddings') vae = model.get_layer('vae_output') g_train = train_generator(train, batch_size, hypo_len, 'class_input' in model.input_names, ne, vae) saver = ModelCheckpoint(model_dir + '/weights.hdf5', monitor = 'hypo_loss', mode = 'min', save_best_only = True) #saver = ModelCheckpoint(model_dir + '/weights{epoch:02d}.hdf5') #es = EarlyStopping(patience = 4, monitor = 'hypo_loss', mode = 'min') csv = CsvHistory(model_dir + '/history.csv') gtest = gm.gen_test(model, glove, batch_size) noise_size = ne.output_shape[-1] if ne else model.get_layer('expansion').input_shape[-1] cb = ValidateGen(dev, gtest, beam_size, hypo_len, val_samples, noise_size, glove, cmodel, True, True) hist = model.fit_generator(g_train, samples_per_epoch = samples_per_epoch, nb_epoch = epochs, callbacks = [cb, saver, csv]) return hist
def train(train, dev, model, model_dir, batch_size, glove, beam_size, samples_per_epoch, val_samples, cmodel, epochs): if not os.path.exists(model_dir): os.makedirs(model_dir) hypo_len = model.get_layer('hypo_input').input_shape[1] - 1 ne = model.get_layer('noise_embeddings') vae = model.get_layer('vae_output') g_train = train_generator(train, batch_size, hypo_len, 'class_input' in model.input_names, ne, vae) saver = ModelCheckpoint(model_dir + '/weights.hdf5', monitor='hypo_loss', mode='min', save_best_only=True) #saver = ModelCheckpoint(model_dir + '/weights{epoch:02d}.hdf5') #es = EarlyStopping(patience = 4, monitor = 'hypo_loss', mode = 'min') csv = CsvHistory(model_dir + '/history.csv') gtest = gm.gen_test(model, glove, batch_size) noise_size = ne.output_shape[-1] if ne else model.get_layer( 'expansion').input_shape[-1] cb = ValidateGen(dev, gtest, beam_size, hypo_len, val_samples, noise_size, glove, cmodel, True, True) hist = model.fit_generator(g_train, samples_per_epoch=samples_per_epoch, nb_epoch=epochs, callbacks=[cb, saver, csv]) return hist
cmodel = cm.attention_model(c_hidden_size, glove) if os.path.exists(orig_cmodel_dir): cmodel.load_weights(orig_cmodel_dir + 'model.weights') if method == 'orig_class': ca.train(train, dev, cmodel, orig_cmodel_dir, batch_size) if method == 'train_gen': gtrain = gm.gen_train(len(train[0]), g_hidden_size, latent_size, glove, hypo_len, version) ga.train(train, dev, gtrain, dir_name, batch_size, glove, beam_size, epoch_size, dev_sample_size, cmodel, gen_epochs) if method == 'augment': gtrain = gm.gen_train(len(train[0]), g_hidden_size, latent_size, glove, hypo_len, version) gtrain.load_weights(dir_name + '/weights.hdf5') gtest = gm.gen_test(gtrain, glove, batch_size) augment.new_generate_save(dev, dir_name, augment_file_size, gtest, beam_size, hypo_len, latent_size, cmodel, wi, 'dev', len(dev[0]), aug_threshold) augment.new_generate_save(train, dir_name, augment_file_size, gtest, beam_size, hypo_len, latent_size, cmodel, wi, 'train', len(train[0]), aug_threshold) if method == 'train_class': for t in thresholds: if type(t) == str and t[0] == 'a': aug_train, aug_dev = augment.load_dataset(dir_name, True, 2**30, 2**30, wi, prem_len, hypo_len) aug_dev = aa.filter_adverserial(aug_dev, t, len(dev[0]), dir_name, glove, a_hidden_size) aug_train = aa.filter_adverserial(aug_train, t, len(train[0]), dir_name, glove, a_hidden_size) else: aug_train, aug_dev = augment.load_dataset(dir_name, t, len(train[0]), len(dev[0]), wi, prem_len, hypo_len) aug_cmodel = cm.attention_model(c_hidden_size, glove)
g_hidden_size) + '-' + str(latent_size) cmodel = cm.attention_model(c_hidden_size, glove) cmodel.load_weights('models/cmodel/model.weights') if method == 'train_gen': gtrain = gm.gen_train(len(train[0]), g_hidden_size, latent_size, glove, hypo_len, version) ga.train(train, dev, gtrain, dir_name, batch_size, glove, beam_size, epoch_size, dev_sample_size, cmodel, gen_epochs) if method == 'augment': gtrain = gm.gen_train(len(train[0]), g_hidden_size, latent_size, glove, hypo_len, version) gtrain.load_weights(dir_name + '/weights.hdf5') gtest = gm.gen_test(gtrain, glove, batch_size) augment.new_generate_save(dev, dir_name, augment_file_size, gtest, beam_size, hypo_len, latent_size, cmodel, wi, 'dev', len(dev[0]), aug_threshold) augment.new_generate_save(train, dir_name, augment_file_size, gtest, beam_size, hypo_len, latent_size, cmodel, wi, 'train', len(train[0]), aug_threshold) if method == 'train_class': for t in thresholds: if type(t) == str and t[0] == 'a': aug_train, aug_dev = augment.load_dataset( dir_name, True, 2**30, 2**30, wi, prem_len, hypo_len) aug_dev = aa.filter_adverserial(aug_dev, t, len(dev[0]), dir_name, glove, a_hidden_size) aug_train = aa.filter_adverserial(aug_train, t, len(train[0]),