def split_data(): print("Data splitting . . ", end="") data = pd.read_table(helper.root_dir() + '/data/data.txt', sep="*") train_data, test_data = train_test_split(data, test_size=0.1, shuffle=True) train_data.to_csv(helper.root_dir() + '/data/train.txt', sep="*", index=None) test_data.to_csv(helper.root_dir() + '/data/test.txt', sep="*", index=None) print("Done. Data saved to data folder. Test: {} rows. Train: {} rows.". format(train_data.shape[0], test_data.shape[0]))
def run(epochs=50, batch_size=256): data_path = helper.root_dir() + '/data/' generator = Generator(batch_size=batch_size) num_training_samples = generator.training_dataset.shape[0] num_validation_samples = generator.validation_dataset.shape[0] print('Number of training samples:', num_training_samples) print('Number of validation samples:', num_validation_samples) nn = model.generate(max_token_length=generator.MAX_TOKEN_LENGTH, vocabulary_size=generator.VOCABULARY_SIZE) nn.compile(loss='categorical_crossentropy', optimizer='adam') models_path = data_path + '/weights/' + datetime.now().strftime( '%Y-%m-%d-%H-%M') os.makedirs(models_path) model_names = (models_path + '/nn_weights.{epoch:02d}-{val_loss:.2f}.hdf5') model_checkpoint = ModelCheckpoint(model_names, monitor='loss', verbose=1, save_best_only=True, mode='min') tboard = TensorBoard(log_dir=data_path + '/tboard/' + datetime.now().strftime('%Y-%m-%d-%H-%M')) callbacks = [model_checkpoint, tboard] nn.fit_generator(generator=generator.flow(train=True), epochs=epochs, steps_per_epoch=int(num_training_samples / batch_size), validation_data=generator.flow(train=False), validation_steps=int(num_validation_samples / batch_size), callbacks=callbacks, verbose=1)
def __init__(self): self.image_model = InceptionV3(weights='imagenet') self.generator = Generator(batch_size=32) self.nn = model.generate( max_token_length=self.generator.MAX_TOKEN_LENGTH, vocabulary_size=self.generator.VOCABULARY_SIZE) data_path = helper.root_dir() + '/data/' models_path = data_path + '/weights/2018-03-25-14-18' model_names = (models_path + '/nn_weights.25-2.94.hdf5') self.nn.load_weights(model_names)
def __init__(self, run_inception=False, word_threshold=-1): self.root_path = helper.root_dir() self.word_threshold = word_threshold self.max_caption_length = 0 self.run_inception = run_inception self.IMG_FEATURES = 1000 # inception model self.BOS = '<S>' # Beginning Of Sentence self.EOS = '<E>' # End Of Sentence self.PAD = '<P>' self.word_frequencies = None self.captions = None self.image_files = None self.image_features = None self.word_to_id = None self.id_to_word = None self.extracted_features = None self.features_file_names = None self.image_feature_files = None self.vocabulary_size = 0
def __init__(self, batch_size=1): self.data_path = helper.root_dir() + '/data/' self.dictionary = None self.training_dataset = None self.validation_dataset = None self.image_names_to_features = None data_logs = np.genfromtxt(self.data_path + 'data_parameters.log', delimiter=' ', dtype='str') data_logs = dict(zip(data_logs[:, 0], data_logs[:, 1])) self.MAX_TOKEN_LENGTH = int(data_logs['max_caption_length:']) + 2 self.IMG_FEATS = int(data_logs['IMG_FEATS:']) self.BOS = str(data_logs['BOS:']) self.EOS = str(data_logs['EOS:']) self.PAD = str(data_logs['PAD:']) self.VOCABULARY_SIZE = None self.word_to_id = None self.id_to_word = None self.BATCH_SIZE = batch_size self._load_dataset() self._load_vocabulary() self._load_image_features()