def main(): tokenizer = get_tokenizer(args.bert_vocab_path) train_data, dev_data, test_data, id2rel, rel2id, num_rels = load_data( args.train_path, args.dev_path, args.test_path, args.rel_dict_path) subject_model, object_model, hbt_model = E2EModel( args.bert_config_path, args.bert_checkpoint_path, args.LR, num_rels) # tensorflow os.environ["CUDA_VISIBLE_DEVICES"] = "0" if K.backend() == 'tensorflow': import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) STEPS = len(train_data) // args.BATCH_SIZE data_manager = data_generator(train_data, tokenizer, rel2id, num_rels, args.MAX_LEN, args.BATCH_SIZE) evaluator = Evaluate(subject_model, object_model, tokenizer, id2rel, dev_data, args.save_weights_path, args.save_model_path) hbt_model.fit_generator(data_manager.__iter__(), steps_per_epoch=STEPS, epochs=args.EPOCH, callbacks=[evaluator]) print("model training finish")
def train(epochs, g, d, print_every=100): d_train, g_train = define_training_functions(g, d) for epoch in range(epochs): print("Epoch #", epoch + 1) epoch_start = time.time() data_gen = data_generator(batch_size, 'data', image_size) for i, img_batch in enumerate(data_gen): d_loss, = d_train([img_batch, 1]) g_loss = g_train([1]) if i % print_every == 0: print("Batch %d d_loss: %f g_loss: %f" % (i, d_loss, g_loss[0])) predict_images(g, 3, "bw_epoch_%d.png" % epoch) epoch_end = time.time() - epoch_start / 60 print("End of Epoch {}. Time Elapsed: {}".format(epoch, epoch_end))
config = { 'data_path': './corpus.txt', 'batch_size': 64, 'epoch': 1, 'embedding_size': 100, 'd_model': 512, 'n_head': 8, 'd_ff': 1024, 'part_num': 4, 'PAD_IDX': 0, #encoder decoder Inputs pad 'layers': 6, 'dropout': 0.1 } DataGenerator = data_generator(config['data_path'], config['batch_size']) config['vocab_size'] = len(DataGenerator.char2id) config['max_len'] = DataGenerator.max_len + 4 ############# #build model ########### model = Transformer(config) model = model.cuda() optimizer = torch.optim.Adam(model.get_trainable_parameters()) crit = torch.nn.CrossEntropyLoss(size_average=False) ########### #train model ###########
def main(): construct_vocab = False encode_images = False train = True # Read and Process Raw data data = CaptioningData() # Finding image files as data data.set_all_images(cfg.images_path) captions_dict = data.get_captions(cfg.token_file) caption_maxlen = data.get_caption_maxlen() # Construct vocabulary if construct_vocab: # get all caption to construct Vocab all_captions = data.get_all_captions() vocab = build_vocab(vocab_path=cfg.data_path, vocab_name=cfg.vocab_name, captions=all_captions, threshold=2) else: vocab = load_vocab(vocab_path=cfg.data_path, vocab_name=cfg.vocab_name) # print(vocab.word2idx) inception_encoding = Encoder() # train data if train: train_images = data.get_train_images(cfg.train_image_files) train_pairs = [ ImgCaptionPair(img_id, captions_dict[img_id]) for img_id in train_images ] # Image Encoding if encode_images: train_img_encoding = inception_encoding.encode_images( file_path=cfg.images_path, image_list=train_images, encoding_file=cfg.train_img_encoding_file) else: train_img_encoding = inception_encoding.load_image_encoding( encoding_file=cfg.train_img_encoding_file) train_data_generator = data_generator(vocab, train_pairs, train_img_encoding, batch_size=1800, max_len=caption_maxlen) # next(g) # Decoder model decoder = Decoder(vocab_size=len(vocab), embedding_size=300, input_shape=2048, caption_max_len=caption_maxlen) decoder_model = decoder.get_model() decoder_model.load_weights('best_weights.97-0.95.hdf5') if train: decoder_model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) ckpt = ModelCheckpoint('weights.{epoch:02d}-{loss:.2f}.hdf5', monitor='loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=30) best_ckpt = ModelCheckpoint('best_weights.{epoch:02d}-{loss:.2f}.hdf5', monitor='loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) decoder_model.fit_generator(train_data_generator, steps_per_epoch=30, epochs=100, callbacks=[ckpt, best_ckpt]) decoder_model.save('decoder_model.h5') img_ids = data.get_val_images(cfg.val_image_files) img_name = img_ids[9] enc_img = inception_encoding.encode_single_img(file_path=cfg.images_path, img_name=img_name) caption = ["<start>"] while True: par_caps = [vocab(i) for i in caption] par_caps = sequence.pad_sequences([par_caps], maxlen=40, padding='post') preds = decoder_model.predict( [np.array([enc_img]), np.array(par_caps)]) word_pred = vocab.idx2word[np.argmax(preds[0])] caption.append(word_pred) if word_pred == "<end>" or len(caption) > 40: break full_img_path = os.path.join(cfg.images_path, img_name) print(captions_dict[img_name]) print(full_img_path) print(' '.join(caption[1:-1]))
def main_train(): # import the arguments parser = argparse.ArgumentParser(description='Create a ArcHydro schema') parser.add_argument( '--vocab_size', type=int, required=False, help= 'the input size, if not given the extracted vocab size value is used') parser.add_argument('--d_model', type=int, required=False, help='LSTM embedding and hidden layer dimension') parser.add_argument('--batch_size', type=int, required=True, help='model batch size') parser.add_argument( '--train_steps', type=int, required=False, help= 'total training steps, if not given the training will be done for a single step' ) parser.add_argument('--output_dir', metavar='path', type=str, required=True, help='output path where model is written') args = parser.parse_args() # make the output dir if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # data import vocab, tag_map = get_vocab('data/large/words.txt', 'data/large/tags.txt') t_sentences, t_labels, t_size = get_params( vocab, tag_map, 'data/large/train/sentences.txt', 'data/large/train/labels.txt') v_sentences, v_labels, v_size = get_params(vocab, tag_map, 'data/large/val/sentences.txt', 'data/large/val/labels.txt') test_sentences, test_labels, test_size = get_params( vocab, tag_map, 'data/large/test/sentences.txt', 'data/large/test/labels.txt') # getting the vocab size, including the <PAD> token vocab_size = len(vocab) # initializing the model if args.vocab_size: vocab_size = args.vocab_size model = NER(vocab_size=vocab_size, d_model=args.d_model, tags=tag_map) batch_size = args.batch_size # Create training data, mask pad for training. train_generator = trax.supervised.inputs.add_loss_weights( data_generator(batch_size, t_sentences, t_labels, vocab['<PAD>'], True), id_to_mask=vocab['<PAD>']) # Create validation data, mask pad for training. eval_generator = trax.supervised.inputs.add_loss_weights( data_generator(batch_size, v_sentences, v_labels, vocab['<PAD>'], True), id_to_mask=vocab['<PAD>']) def train_model(model, train_generator, eval_generator, train_steps=1, output_dir=args.output_dir): ''' Input: model - the model we are building train_generator - The data generator for training examples eval_generator - The data generator for validation examples, train_steps - number of training steps output_dir - folder to save your model Output: training_loop - a trax supervised training Loop ''' train_task = training.TrainTask( train_generator, # A train data generator loss_layer=tl.CrossEntropyLoss(), # A cross-entropy loss function optimizer=trax.optimizers.Adam(0.01), # The adam optimizer ) eval_task = training.EvalTask( labeled_data=eval_generator, # A labeled data generator metrics=[tl.CrossEntropyLoss(), tl.Accuracy() ], # Evaluate with cross-entropy loss and accuracy n_eval_batches=10 # Number of batches to use on each evaluation ) training_loop = training.Loop( model, # A model to train train_task, # A train task eval_task=eval_task, # The evaluation task output_dir=output_dir) # The output directory # Train with train_steps training_loop.run(n_steps=train_steps) return training_loop # training loop train_steps = args.train_steps # Train the model # training_loop = train_model(model, train_generator, eval_generator, train_steps) train_model(model, train_generator, eval_generator, train_steps)
save_weights_path = 'saved_weights/' + dataset + '/best_model.weights' LR = 1e-5 tokenizer = get_tokenizer(bert_vocab_path) train_data, dev_data, test_data, id2rel, rel2id, num_rels = load_data( train_path, dev_path, test_path, rel_dict_path) subject_model, object_model, hbt_model = E2EModel(bert_config_path, bert_checkpoint_path, LR, num_rels) if args.train: BATCH_SIZE = 6 EPOCH = 100 MAX_LEN = 100 STEPS = len(train_data) // BATCH_SIZE data_manager = data_generator(train_data, tokenizer, rel2id, num_rels, MAX_LEN, BATCH_SIZE) evaluator = Evaluate(subject_model, object_model, tokenizer, id2rel, dev_data, save_weights_path) hbt_model.fit_generator(data_manager.__iter__(), steps_per_epoch=STEPS, epochs=EPOCH, callbacks=[evaluator]) else: hbt_model.load_weights(save_weights_path) test_result_path = 'results/' + dataset + '/test_result.json' isExactMatch = True if dataset == 'Wiki-KBP' else False if isExactMatch: print("Exact Match") else: print("Partial Match") precision, recall, f1_score = metric(subject_model, object_model,