def train(): # load training dataset (6K) filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) print('Dataset: %d' % len(train)) train_captions = util.load_clean_captions('descriptions.txt', train) print('Captions: train number=%d' % len(train_captions)) # photo features train_features = util.load_photo_features('features.pkl', train) print('Photos: train=%d' % len(train_features)) # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # define the model model = caption_model(vocab_size, max_len) # train the model, run epochs manually and save after each epoch epochs = 20 steps = len(train_captions) for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # fit for one epoch model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) # save model model.save('model_' + str(i) + '.h5')
def train(): # load training dataset (6K) filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) print('Dataset: %d' % len(train)) train_captions = util.load_clean_captions('descriptions.txt', train) print('Captions: train number=%d' % len(train_captions)) # photo features train_features = util.load_photo_features('features.pkl', train) print('Photos: train=%d' % len(train_features)) # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # define the model model = caption_model(vocab_size, max_len) # train the model, run epochs manually and save after each epoch epochs = 5 steps = len(train_captions) for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # fit for one epoch # generator just return two dimenstion data, the first means X, which has two data # first is the featur of the pic, second is the surfix words; second means Y, the # word of predict for Next. # At first I don't kown why generator will return three value(feature, surfix, the next word) # but the model just has two input, later I got first tow means X and will go into the model, # the third is means Y, the reponse variance. # Don't need fear the generator will be executed forever, for the super-parameter epoches & steps_per_epoch # has limited the time of invoking generator, which is epochs * steps_per_epoch # Generator, is magical! model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) # save model model.save('model' + os.sep + 'model_my' + str(i) + '.h5')
def prepare_data(pronunciations, letter_ids): logging.info('preparing the data...') max_length = util.get_max_length(pronunciations) X = np.zeros((len(pronunciations), max_length, len(letter_ids)), dtype=bool) y = np.zeros(len(pronunciations), dtype=int) for n, word in enumerate(pronunciations): for i, letter in enumerate(word): letter_id = letter_ids[letter] X[n, i, letter_id] = True y[n] = len(pronunciations[word]) return X, y
def __init__(self, model: Model, pronunciations, letter_ids): self.model = model self.pronunciations = pronunciations self.letter_ids = letter_ids max_len = util.get_max_length(pronunciations.values()) self.max_len = max_len
# 'startseq Two women laying on grass with a dog . endseq', # 'startseq Two women on a grassy hill lit by the sun ; one looks at a dachshund , the other looks to the side . endseq'] # photo features feature_path = r"E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/features.pkl" train_features = util.load_photo_features(feature_path, train) # print('Photos: train=%d' % len(train_features)) # print(len(train_features["3585117340_73e96b6173"][0])) # 4096 # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 # 词汇表大小 print('Vocabulary Size: %d' % vocab_size) # 7378 # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # 40 # define the model model = caption_model(vocab_size, max_len) print(model) # train the model, run epochs manually and save after each epoch epochs = 20 steps = len(train_captions) # 6000 for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # print(generator) # fit for one epoch model.fit_generator(generator,
# ---------- Data Load ---------- x_paths = {} t_paths = {} dataset = {} loader = {} for type_ in types: print('Loading %s dataset ... ' % (type_), end='') x_paths[type_] = glob(join(data_dir, 'x_' + type_, '*.bin')) t_paths[type_] = glob(join(data_dir, 't_' + type_, '*.bin')) x_dim = config.get_feature_config().get_linguistic_dim(type_) t_dim = config.get_feature_config().get_parm_dim(type_) max_len = get_max_length(x_paths[type_], x_dim) pad_value = config.get_feature_config().pad_value dataset[type_] = SpeechDataset(x_paths[type_], t_paths[type_], x_dim=x_dim, t_dim=t_dim, max_len=max_len, pad_value=pad_value) batch_size = config.get_train_config().batch_size loader[type_] = DataLoader(dataset[type_], batch_size=batch_size, shuffle=True) print('done!') print('\tDataset Size\t%d' % (len(x_paths[type_]))) print('\tInput Dim\t%d' % (x_dim))