def fit(self, x, y, epochs=1, batch_size=DEFAULT_BATCH_SIZE, shuffle=True, validation_data=None, save_best=None, optimizer=None, optimizer_encoder=None, pointer=False): data = self.check_data(x, y, batch_size) if not self.model: logger.info('build model') word_index, index_word = get_tags(y) logger.info(f'tags count: {len(word_index)}') logger.info('build model') if not pointer: self.model = TaggerModel( encoder_path=self.encoder_path, hidden_size=self.hidden_size, dropout=self.dropout, n_layers=self.n_layers, n_additional_features=self.n_additional_features, rnn=self.rnn, word_index=word_index, index_word=index_word, encoder_trainable=self.encoder_trainable, bidirection=self.bidirection) else: self.model = TaggerPointerModel( encoder_path=self.encoder_path, word_index=word_index, index_word=index_word, encoder_trainable=self.encoder_trainable) if optimizer is None: optimizer = tf.optimizers.Adam( learning_rate=1e-3, clipnorm=1.0) if optimizer_encoder is None: optimizer_encoder = tf.optimizers.Adam( learning_rate=1e-5, clipnorm=1.0) self.model.optimizer_encoder = optimizer_encoder self.model.compile(optimizer=optimizer) logger.info('check model predict') pred_data = self.check_data(x, y=None, batch_size=batch_size) for xx in pred_data.take(2): self.model.predict_on_batch(xx) logger.info('start training') self.model.fit( data, epochs=epochs, callbacks=[ CheckValidation( tagger=self, batch_size=batch_size, validation_data=validation_data, save_best=save_best) ] ) logger.info('training done')
def fit(self, x, y, epochs=1, batch_size=DEFAULT_BATCH_SIZE, shuffle=True, validation_data=None, save_best=None, optimizer=None, optimizer_encoder=None): data = self.check_data(x, y, batch_size) if not self.model: logger.info('build model') word_index, index_word = get_tags(y) logger.info(f'targets count: {len(word_index)}') logger.info('build model') self.model = ClassificationModel( encoder_path=self.encoder_path, word_index=word_index, index_word=index_word, encoder_trainable=self.encoder_trainable) if optimizer is None: optimizer = tf.keras.optimizers.Adam(1e-4) if optimizer_encoder is None: optimizer_encoder = tf.keras.optimizers.Adam(1e-5) self.model.optimizer_encoder = optimizer_encoder self.model.compile(optimizer=optimizer, metrics=['acc']) logger.info('check model predict') self.model.predict(tf.constant( [['[CLS]'] + xx[:MAX_LENGTH] + ['[SEP]'] for xx in x[:1]]), verbose=0) logger.info('start training') self.model.fit(data, epochs=epochs) logger.info('training done')
def fit(self, x, y0, y1, epochs=1, batch_size=DEFAULT_BATCH_SIZE, build_only=False, optimizer=None, optimizer_encoder=None): assert hasattr(x, '__len__'), 'X should be a list/np.array' assert len(x) > 0, 'len(X) should more than 0' assert isinstance(x[0], (tuple, list)), \ 'Elements of X should be tuple or list' if not self.model: logger.info('parser.fit build model') word_index, index_word = get_tags(y0) pos_word_index, pos_index_word = pos_get_tags(y1) self.model = ParserModel( encoder_path=self.encoder_path, tag0_size=len(word_index), tag1_size=1, # binary proj0_size=self.proj0_size, proj1_size=self.proj1_size, hidden_size=self.hidden_size, n_layers=self.n_layers, encoder_trainable=self.encoder_trainable, word_index=word_index, index_word=index_word, pos_word_index=pos_word_index, pos_index_word=pos_index_word, rnn=self.rnn, bidirection=self.bidirection) if build_only: return if optimizer is None: optimizer = tf.optimizers.Adam(learning_rate=1e-3, clipnorm=1.0) if optimizer_encoder is None: optimizer_encoder = tf.optimizers.Adam(learning_rate=1e-5, clipnorm=1.0) self.model.optimizer_encoder = optimizer_encoder self.model.compile(optimizer=optimizer) logger.info('parser.fit start training') def make_generator(data): def _gen(): for item in data: yield np.asarray(item, dtype=str) return _gen x_dataset = tf.data.Dataset.from_generator(make_generator(x), tf.string, tf.TensorShape([ None, ])) y0_dataset = tf.data.Dataset.from_generator( make_generator(y0), tf.string, tf.TensorShape([None, None])) y1_dataset = tf.data.Dataset.from_generator(make_generator(y1), tf.string, tf.TensorShape([ None, ])) # REMOVE bucket_by_sequence_length because it's mentioned as deprecated in tf2.5 # bucket_size = 5 # bucket_boundaries = list(range( # MAX_LENGTH // bucket_size, MAX_LENGTH, MAX_LENGTH // bucket_size)) # bucket_batch_sizes = [batch_size] * (len(bucket_boundaries) + 1) # x_dataset, y0_dataset, y1_dataset = [ # dataset.apply( # tf.data.experimental.bucket_by_sequence_length( # size_func, # bucket_batch_sizes=bucket_batch_sizes, # bucket_boundaries=bucket_boundaries # ) # ) # for dataset, size_func in zip( # (x_dataset, y0_dataset, y1_dataset), # ( # lambda x: tf.size(x), # lambda x: tf.shape(x)[0], # lambda x: tf.size(x) # ) # ) # ] dataset = tf.data.Dataset.zip((x_dataset, (y0_dataset, y1_dataset))) dataset = dataset.shuffle(1024, reshuffle_each_iteration=True) # @TODO: if drop_remainder is False, may cause bug dataset = dataset.padded_batch(batch_size, drop_remainder=True) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) for xb, (y0b, y1b) in dataset.take(2): self.model.predict_on_batch(xb) self.model.fit(dataset, epochs=epochs) dataset = None