예제 #1
0
def in_memory_train_eval(estimator: tf.estimator.Estimator,
                         model: EstimatorConvModel):
    dataset_provider = model.dataset_provider
    train_steps = config[consts.TRAIN_STEPS]
    eval_steps_interval = config[consts.EVAL_STEPS_INTERVAL]
    if config[consts.EXCLUDED_KEYS]:
        eval_name = filenames.create_excluded_name_fragment()
    else:
        eval_name = None

    evaluator = tf.contrib.estimator.InMemoryEvaluatorHook(
        estimator=estimator,
        input_fn=lambda: dataset_provider.eval_input_fn(),
        every_n_iter=eval_steps_interval,
        name=eval_name)
    hooks = [evaluator]

    if config[consts.EXCLUDED_KEYS]:
        e = tf.contrib.estimator.InMemoryEvaluatorHook(
            estimator=estimator,
            input_fn=lambda: dataset_provider.eval_with_excludes_input_fn(),
            every_n_iter=eval_steps_interval,
            name='full')
        hooks.append(e)

    estimator.train(input_fn=lambda: dataset_provider.train_input_fn(),
                    steps=train_steps,
                    hooks=hooks)
예제 #2
0
def train_and_test(estimator: tf.estimator.Estimator, train_input_fn,
                   test_input_fn, steps, steps_between_evals, eval_steps):
    eval_results = estimator.evaluate(input_fn=test_input_fn, steps=eval_steps)
    print(eval_results)

    for i in range(steps // steps_between_evals):
        estimator.train(
            input_fn=train_input_fn,
            steps=steps_between_evals,
        )
        eval_results = estimator.evaluate(input_fn=test_input_fn,
                                          steps=eval_steps)
        print(eval_results)
def train(model: tf.estimator.Estimator,
          nb_epochs: int,
          train_data_path: str,
          val_data_path: str,
          batch_size: int = 32):

    train_epoch_history = [
        model.evaluate(
            input_fn=lambda: load_dataset(train_data_path, shuffle=False))
    ]
    validation_epoch_history = [
        model.evaluate(
            input_fn=lambda: load_dataset(val_data_path, shuffle=False))
    ]
    for epoch in range(nb_epochs):
        model_spec = model.train(
            input_fn=lambda: load_dataset('data/train.tfrecords',
                                          epochs=1,
                                          shuffle=True,
                                          batch_size=batch_size))

        train_epoch_history.append(
            model.evaluate(
                input_fn=lambda: load_dataset(train_data_path, shuffle=False)))
        validation_epoch_history.append(
            model.evaluate(
                input_fn=lambda: load_dataset(val_data_path, shuffle=False)))

        logging.info(f"EPOCH: {epoch}:\n"
                     f"\tval_loss: {validation_epoch_history[-1]['loss']}\n"
                     f"\ttrain_loss: {train_epoch_history[-1]['loss']}\n")

    return train_epoch_history, validation_epoch_history
    def fit_model_on_fold(self, compiled_model: tf.estimator.Estimator, curr_fold_indices,
                          train_sequences, test_sequences):
        """
        trains compiled (but previously unfitted) model against given indices
        :param compiled_model:
        :param curr_fold_indices:
        :param train_sequences:
        :param test_sequences:
        :return:
        """
        def train_input_fn(features, labels, batch_size):
            dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
            dataset = dataset.shuffle(10000).repeat().batch(batch_size)
            return dataset

        def eval_input_fn(features, labels, batch_size):
            """use for both validation and prediction"""
            features = dict(features)
            if labels is None:
                inputs = features
            else:
                inputs = (features, labels)

            dataset = tf.data.Dataset.from_tensor_slices(inputs)
            return dataset.batch(batch_size)

        train_indices, val_indices = curr_fold_indices
        x_train = {'sequence': train_sequences[train_indices]}
        y_train = self.raw_train_df[self.target_cols].iloc[train_indices].values

        x_val = {'sequence': train_sequences[val_indices]}
        y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values

        compiled_model.train(input_fn=lambda: train_input_fn(x_train, y_train, self.batch_size),
                             steps=self.epochs * len(train_indices) // self.batch_size,)
        lambda_input_fn = lambda: eval_input_fn(x_val, None, self.batch_size)
        val_predictions = compiled_model.predict(lambda_input_fn)
        val_prob = np.array([x['probabilities'] for x in val_predictions])
        val_roc_auc_score = roc_auc_score(y_val, val_prob)
        print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score))

        x_test = {'sequence': test_sequences}
        lambda_input_fn = lambda: eval_input_fn(x_test, None, self.batch_size)
        test_predictions = compiled_model.predict(input_fn=lambda_input_fn)
        test_prob = np.array([x['probabilities'] for x in test_predictions])

        return val_roc_auc_score, test_prob