Ejemplo n.º 1
0
def train_model(weight=None, epochs=10):
    # load dataset
    data = ld.prepare_dataset('train')
    train_features, train_descriptions = data[0]
    test_features, test_descriptions = data[1]

    # prepare tokenizer
    tokenizer = gen.create_tokenizer(train_descriptions)
    # save the tokenizer
    dump(tokenizer, open('models/tokenizer.pkl', 'wb'))
    # index_word dict
    index_word = {v: k for k, v in tokenizer.word_index.items()}
    # save dict
    dump(index_word, open('models/index_word.pkl', 'wb'))

    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)

    # determine the maximum sequence length
    max_length = gen.max_length(train_descriptions)
    print('Description Length: %d' % max_length)

    # generate model
    model = gen.define_model(vocab_size, max_length)

    # Check if pre-trained weights to be used
    if weight != None:
        model.load_weights(weight)

    # define checkpoint callback
    filepath = 'models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')

    steps = len(train_descriptions)
    val_steps = len(test_descriptions)
    # create the data generator
    train_generator = gen.data_generator(train_descriptions, train_features,
                                         tokenizer, max_length)
    val_generator = gen.data_generator(test_descriptions, test_features,
                                       tokenizer, max_length)

    # fit model
    model.fit_generator(train_generator,
                        epochs=epochs,
                        steps_per_epoch=steps,
                        verbose=1,
                        callbacks=[checkpoint],
                        validation_data=val_generator,
                        validation_steps=val_steps)

    try:
        model.save('models/wholeModel.h5', overwrite=True)
        model.save_weights('models/weights.h5', overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")
Ejemplo n.º 2
0
def model_training(weight = None, epochs = 10):
  
  train_d = ld.prepare_dataset('train')
  train_features, train_descriptions = train_d[0]
  test_features, test_descriptions = train_d[1]

  
  split_sentences = gen.create_tokenizer(train_descriptions)
  
  dump(split_sentences, open('models/tokenizer.pkl', 'wb'))
  
  index_word = {value: key for key, value in split_sentences.word_index.items()}
  
  dump(index_word, open('models/index_word.pkl', 'wb'))

  vocab_size = len(split_sentences.word_index) + 1
  print('Size of the Vocabulary: %d' % vocab_size)

  
  max_length = gen.max_length(train_descriptions)
  print('Length of the Descriptions: %d' % max_length)

  
  model = gen.define_model(vocab_size, max_length)

  
  if weight != None:
    model.load_weights(weight)

  
  filepath = 'models/model.h5'
  checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1,
                save_best_only=True, mode='min')

  steps = len(train_descriptions)
  val_steps = len(test_descriptions)
  
  tg = gen.data_generator(train_descriptions, train_features, split_sentences, max_length)
  vg = gen.data_generator(test_descriptions, test_features, split_sentences, max_length)

  
  model.fit_generator(tg, epochs=epochs, steps_per_epoch=steps, verbose=1,
        callbacks=[checkpoint], validation_data=vg, validation_steps=val_steps)

  try:
      model.save('models/wholeModel.h5', overwrite=True)
      model.save_weights('models/weights.h5',overwrite=True)
  except:
      print("Error")
  print("Training has been completed broooooooooooo...\n")
Ejemplo n.º 3
0
def train_model(weight=None, epochs=10):
    data = ld.prepare_dataset('train')
    train_features, train_descriptions = data[0]
    test_features, test_descriptions = data[1]

    tokenizer = gen.create_tokenizer(train_descriptions)
    dump(tokenizer, open('../models/tokenizer.pkl', 'wb'))
    index_word = {v: k for k, v in tokenizer.word_index.items()}
    dump(index_word, open('../models/index_word.pkl', 'wb'))

    vocab_size = len(tokenizer.word_index) + 1

    max_length = gen.max_length(train_descriptions)

    model = gen.define_model(vocab_size, max_length)

    if weight != None:
        model.load_weights(weight)

    filepath = '../models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')

    steps = len(train_descriptions)
    val_steps = len(test_descriptions)
    train_generator = gen.data_generator(train_descriptions, train_features,
                                         tokenizer, max_length)
    val_generator = gen.data_generator(test_descriptions, test_features,
                                       tokenizer, max_length)

    model.fit_generator(train_generator,
                        epochs=epochs,
                        steps_per_epoch=steps,
                        verbose=1,
                        callbacks=[checkpoint],
                        validation_data=val_generator,
                        validation_steps=val_steps)

    try:
        model.save('../models/wholeModel.h5', overwrite=True)
        model.save_weights('../models/weights.h5', overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")