def main(args): token_to_index, index_to_token = Vocabulary.load(args.vocab_file) root, _ = os.path.splitext(args.vocab_file) basepath, basename = os.path.split(root) embed_path = f'{basepath}/embedding_{basename}.npy' embeddings = np.load(embed_path) if os.path.exists(embed_path) else None model = FastQA(len(token_to_index), args.embed, args.hidden, question_limit=args.q_len, context_limit=args.c_len, dropout=args.dropout, pretrained_embeddings=embeddings, with_feature=not args.without_feature).build() opt = Adam() model.compile(optimizer=opt, loss_weights=[1, 1, 0, 0], loss=['sparse_categorical_crossentropy', 'sparse_categorical_crossentropy', None, None]) train_dataset = SquadReader(args.train_path) dev_dataset = SquadReader(args.dev_path) tokenizer = get_tokenizer(lower=args.lower, as_str=False) converter = SquadConverter(token_to_index, PAD_TOKEN, UNK_TOKEN, tokenizer, question_max_len=args.q_len, context_max_len=args.c_len) eval_converter = SquadEvalConverter( token_to_index, PAD_TOKEN, UNK_TOKEN, tokenizer, question_max_len=args.q_len, context_max_len=args.c_len) train_generator = Iterator(train_dataset, args.batch, converter) dev_generator_loss = Iterator(dev_dataset, args.batch, converter, shuffle=False) dev_generator_f1 = Iterator(dev_dataset, args.batch, eval_converter, repeat=False, shuffle=False) trainer = SquadTrainer(model, train_generator, args.epoch, dev_generator_loss, './models/fastqa.{epoch:02d}-{val_loss:.2f}.h5') trainer.add_callback(FastQALRScheduler( dev_generator_f1, val_answer_file=args.answer_path, steps=args.steps)) trainer.add_callback(FastQACheckpoint('./models/fastqa.{steps:06d}.h5', steps=args.steps)) if args.use_tensorboard: trainer.add_callback(TensorBoard(log_dir='./graph', batch_size=args.batch)) history = trainer.run() dump_graph(history, 'loss_graph.png')
def setUp(self): dataset = range(100) self.batch_size = 32 def converter(x): return x self.generator1 = Iterator(dataset, self.batch_size, converter) self.generator2 = Iterator(dataset, self.batch_size, converter, False, False) self.dataset = dataset self.converter = converter
def main(args): token_to_index, index_to_token = Vocabulary.load(args.vocab_file) root, _ = os.path.splitext(args.vocab_file) basepath, basename = os.path.split(root) embed_path = f'{basepath}/embedding_{basename}.npy' embeddings = np.load(embed_path) if os.path.exists(embed_path) else None batch_size = args.batch # Batch size for training. epochs = args.epoch # Number of epochs to train for. converter = SquadDepConverter(token_to_index, PAD_TOKEN, UNK_TOKEN) if args.model == 'qanet': model = DependencyQANet(len(token_to_index), args.embed, len(converter._dep_to_index), args.hidden, args.num_heads, dropout=args.dropout, num_blocks=args.encoder_layer, num_convs=args.encoder_conv, embeddings=embeddings).build() elif args.model == 'lstm': model = DependencyLSTM(len(token_to_index), args.embed, len(converter._dep_to_index), args.hidden, dropout=args.dropout, embeddings=embeddings).build() opt = Adam(lr=0.001, beta_1=0.8, beta_2=0.999, epsilon=1e-7, clipnorm=5.) model.compile(optimizer=opt, loss=['sparse_categorical_crossentropy'], metrics=['sparse_categorical_accuracy']) train_dataset = SquadReader(args.train_path) dev_dataset = SquadReader(args.dev_path) train_generator = Iterator(train_dataset, batch_size, converter) dev_generator = Iterator(dev_dataset, batch_size, converter) trainer = SquadTrainer(model, train_generator, epochs, dev_generator, './model/dep.{epoch:02d}-{val_loss:.2f}.h5') trainer.add_callback(BatchLearningRateScheduler()) trainer.add_callback(ExponentialMovingAverage(0.999)) if args.use_tensorboard: trainer.add_callback( TensorBoard(log_dir='./graph', batch_size=batch_size)) history = trainer.run() dump_graph(history, 'loss_graph.png') test_dataset = SquadReader(args.test_path) test_generator = Iterator(test_dataset, args.batch, converter, False, False) print(model.evaluate_generator(test_generator, steps=len(test_generator)))
def main(args): token_to_index, _ = Vocabulary.load(args.vocab_file) model = FastQA(len(token_to_index), args.embed, args.hidden, question_limit=args.q_len, context_limit=args.c_len, with_feature=not args.without_feature).build() model.load_weights(args.model_path) test_dataset = SquadReader(args.test_path) tokenizer = get_tokenizer(lower=args.lower, as_str=False) converter = SquadEvalConverter(token_to_index, PAD_TOKEN, UNK_TOKEN, tokenizer, question_max_len=args.q_len, context_max_len=args.c_len) test_generator = Iterator(test_dataset, args.batch, converter, False, False) predictions = {} for inputs, (contexts, ids) in test_generator: _, _, start_indices, end_indices = model.predict_on_batch(inputs) for i, (start, end) in enumerate(zip(start_indices, end_indices)): prediction = ' '.join(contexts[i][j] for j in range(start, end + 1)) predictions[ids[i]] = prediction basename = osp.splitext(osp.basename(args.model_path))[0] save_path = osp.join(args.save_dir, f'predictions_{basename}.json') with open(save_path, 'w') as f: json.dump(predictions, f, indent=2)
def main(args): token_to_index, index_to_token = Vocabulary.load(args.vocab_file) root, _ = os.path.splitext(args.vocab_file) basepath, basename = os.path.split(root) embed_path = f'{basepath}/embedding_{basename}.npy' embeddings = np.load(embed_path) if os.path.exists(embed_path) else None batch_size = args.batch # Batch size for training. epochs = args.epoch # Number of epochs to train for. model = QANet(len(token_to_index), args.embed, args.hidden, args.num_heads, encoder_num_blocks=args.encoder_layer, encoder_num_convs=args.encoder_conv, output_num_blocks=args.output_layer, output_num_convs=args.output_conv, dropout=args.dropout, embeddings=embeddings).build() opt = Adam(lr=0.001, beta_1=0.8, beta_2=0.999, epsilon=1e-7, clipnorm=5.) model.compile(optimizer=opt, loss=[ 'sparse_categorical_crossentropy', 'sparse_categorical_crossentropy', None, None ], loss_weights=[1, 1, 0, 0]) train_dataset = SquadReader(args.train_path) dev_dataset = SquadReader(args.dev_path) converter = SquadConverter(token_to_index, PAD_TOKEN, UNK_TOKEN, lower=args.lower) train_generator = Iterator(train_dataset, batch_size, converter) dev_generator = Iterator(dev_dataset, batch_size, converter) trainer = SquadTrainer(model, train_generator, epochs, dev_generator, './model/qanet.{epoch:02d}-{val_loss:.2f}.h5') trainer.add_callback(BatchLearningRateScheduler()) # trainer.add_callback(ExponentialMovingAverage(0.999)) if args.use_tensorboard: trainer.add_callback( TensorBoard(log_dir='./graph', batch_size=batch_size)) history = trainer.run() dump_graph(history, 'loss_graph.png')
def thread_run(path, search, config, col_type, dataset, sampleset): if config["rand_search"] == "yes": param = parameter_search(config["model"]) else: param = config["param"] with open(path+"exp_params.json", "a") as f: json.dump(param, f) f.write("\n") model = config["model"] train_method = config["train_method"] if train_method == "CTrain" or train_method == "CTrain_dp" or train_method == "CTrain_nofair": labels = config["label"] else: labels = None if model == "DCGAN": square = True pad = 0 else: square = False pad = None #print(dataset.col_ind) #print(sampleset.col_ind) #print(labels) train_it, sample_it = Iterator.split( batch_size = param["batch_size"], train = dataset, validation = sampleset, sort_key = None, shuffle = True, labels = labels, square = square, pad = pad ) x_dim = train_it.data.shape[1] col_dim = dataset.col_dim col_ind = dataset.col_ind if train_method == "CTrain" or train_method == "CTrain_dp" or train_method == "CTrain_nofair": c_dim = train_it.label.shape[1] condition = True else: c_dim = 0 condition = False if model == "VGAN": gen = VGAN_generator(param["z_dim"], param["gen_hidden_dim"], x_dim, param["gen_num_layers"], col_type, col_ind, condition=condition,c_dim=c_dim) if "dis_model" in config.keys(): if config["dis_model"] == "lstm": dis = LSTM_discriminator(x_dim, param["dis_lstm_dim"], condition, c_dim) elif config["dis_model"] == "mlp": dis = VGAN_discriminator(x_dim, param["dis_hidden_dim"], param["dis_num_layers"],condition,c_dim) else: dis = VGAN_discriminator(x_dim, param["dis_hidden_dim"], param["dis_num_layers"],condition,c_dim) elif model == "LGAN": gen = LGAN_generator(param["z_dim"], param["gen_feature_dim"], param["gen_lstm_dim"], col_dim, col_type, condition, c_dim) if "dis_model" in config.keys(): if config["dis_model"] == "lstm": dis = LSTM_discriminator(x_dim, param["dis_lstm_dim"], condition, c_dim) elif config["dis_model"] == "mlp": dis = LGAN_discriminator(x_dim, param["dis_hidden_dim"], param["dis_num_layers"], condition, c_dim) else: dis = LGAN_discriminator(x_dim, param["dis_hidden_dim"], param["dis_num_layers"], condition, c_dim) #elif model == "DCGAN": # gen = DCGAN_generator(param["z_dim"], train_it.shape, 2, col_type) # dis = DCGAN_discriminator(train_it.shape, 2) print(gen) print(dis) GPU = torch.cuda.is_available() if "sample_times" in config.keys(): sample_times = config["sample_times"] else: sample_times = 1 if train_method == "VTrain": print((c_dim, condition, x_dim)) KL = True if "KL" in config.keys(): KL = True if config["KL"] == "yes" else False V_Train(search, path, sample_it, gen, dis, config["n_epochs"], param["lr"], train_it, param["z_dim"], dataset, col_type, sample_times,itertimes = 100, steps_per_epoch = config["steps_per_epoch"],GPU=GPU,KL=KL) elif train_method == "CTrain": print((c_dim, condition, x_dim)) print(train_it.label.shape) C_Train(search, path, sample_it, gen, dis, config["n_epochs"], param["lr"], train_it, param["z_dim"], dataset, col_type, sample_times,itertimes = 100, steps_per_epoch = config["steps_per_epoch"],GPU=GPU) elif train_method == "WTrain": dis.wgan = True KL=True if "KL" in config.keys(): KL = True if config["KL"] == "yes" else False W_Train(search, path, sample_it, gen, dis, config["ng"], config["nd"], 0.01, param["lr"], train_it, param["z_dim"], dataset, col_type,sample_times, itertimes=100, GPU=GPU,KL=KL) elif train_method == "CTrain_dp": dis.wgan = True C_Train_dp(search, path, sample_it, gen, dis, config["ng"], config["nd"], 0.01, param["lr"], train_it, param["z_dim"], dataset, col_type, config["eps"], sample_times,itertimes = 100, GPU=GPU) elif train_method == "CTrain_nofair": C_Train_nofair(search, path, sample_it, gen, dis, config["n_epochs"], param["lr"], train_it, param["z_dim"], dataset, col_type,sample_times,itertimes = 100, steps_per_epoch = config["steps_per_epoch"], GPU=GPU)
def thread_run(path, search, config, source_dst, target_dst, GPU): if config["rand_search"] == "yes": param = parameter_search(gen_model=config["gen_model"]) else: param = config["param"] with open(path + "exp_params.json", "a") as f: json.dump(param, f) f.write("\n") source_it = Iterator(dataset=source_dst, batch_size=param["batch_size"], shuffle=False, labels=config["labels"], mask=config["source_mask"]) target_it = Iterator(dataset=target_dst, batch_size=param["batch_size"], shuffle=False, labels=config["labels"], mask=config["target_mask"]) x_dim = source_it.data.shape[1] col_ind = source_dst.col_ind col_dim = source_dst.col_dim col_type = source_dst.col_type mask_dim = target_it.masks.shape[1] if config["Gm"] == "yes": mask_gen = MaskGenerator_MLP(param["z_dim"], x_dim, param["mask_gen_hidden_dims"], mask_dim) mask_dis = Discriminator(mask_dim, param["mask_dis_hidden_dims"], c_dim=x_dim, condition=True) else: mask_gen = None mask_dis = None if config["Gx"] == "yes": if config["gen_model"] == "LSTM": obs_gen = ObservedGenerator_LSTM(param["z_dim"], param["obs_gen_feature_dim"], param["obs_gen_lstm_dim"], col_dim, col_type, col_ind, x_dim, mask_dim) elif config["gen_model"] == "MLP": obs_gen = ObservedGenerator_MLP(param["z_dim"], param["obs_gen_hidden_dims"], x_dim, mask_dim, col_type, col_ind) else: obs_gen = None obs_dis = Discriminator(x_dim, param["obs_dis_hidden_dims"]) print(mask_gen) print(mask_dis) print(obs_gen) print(obs_dis) handler = Handler(source_it, target_it, source_dst, path) if mask_gen is None and obs_gen is None: handler.translate(mask_gen, obs_gen, param["z_dim"], path + "translate_{}".format(search), GPU=True, repeat=1) else: mask_gen, obs_gen, mask_dis, obs_dis = handler.train(mask_gen, obs_gen, mask_dis, obs_dis, param, config, search, GPU=GPU)