Ejemplo n.º 1
0
def make_submission(batch_size: int, paralell_calls: int, sub_file: str):

    estimator = model_module.ProteinEstimator()
    model_config = common.TFHubModels(common.ConfigurationJson().TF_HUB_MODULE)

    dir_dataset = dataset_module.Dataset(common.PathsJson().DIR_TRAIN)
    img_ids_paths = dir_dataset.scan_dir()

    def g():
        for items in img_ids_paths.items():
            yield items

    def input_fn():
        def _map_fn(img_id, paths):

            img = dataset_module.tf_load_image(paths)
            img = tf.image.crop_to_bounding_box(
                img,
                tf.to_int32(
                    (tf.shape(img)[0] - model_config.expected_image_size[0]) /
                    2),
                tf.to_int32(
                    (tf.shape(img)[1] - model_config.expected_image_size[1]) /
                    2),
                model_config.expected_image_size[0],
                model_config.expected_image_size[1],
            )[:, :, 0:3] / 255

            return {
                dataset_module.TFRecordKeys.ID: img_id,
                dataset_module.TFRecordKeys.DECODED: img
            }

        dataset = tf.data.Dataset.from_generator(g, (tf.string, tf.string), ([], [None])) \
            .apply(tf.data.experimental.map_and_batch(_map_fn, batch_size)) \
            .prefetch(None)

        return dataset

    desired_pred = [estimator.IMAGE_ID, estimator.SCORES]

    submission = common.Submission(sub_file)
    logger = logging.getLogger("predictor")
    logger.info(f"Using batch of {batch_size}")
    logger.info(f"Using {paralell_calls} paralell calls")

    tf.logging.set_verbosity(tf.logging.INFO)
    for i, predictions in enumerate(estimator.predict(input_fn, desired_pred)):
        i += 1

        img_id = predictions[estimator.IMAGE_ID]
        scores = predictions[estimator.SCORES]
        labels = dir_dataset.vector_label((scores > 0.5).astype(np.int))
        submission.add_submission(img_id.decode(), labels)

        if i % 100 == 0:
            logger.info(f"Wrote {i} examples.")

    submission.end_sumbission()
    logger.info(f"Finished, wrote {i} examples.")
Ejemplo n.º 2
0
    tflogger.setLevel(logging.INFO)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


if __name__ == "__main__":

    import argparse
    import os

    logging.basicConfig(level=logging.INFO)

    logger = logging.getLogger("train_script")
    logging.getLogger("tensorflow").setLevel(tf.logging.ERROR)

    config = common.ConfigurationJson()
    paths = common.PathsJson()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    parser = argparse.ArgumentParser(
        description="Train the model on parsed data.")

    parser.add_argument("--override", action="store_true", help="""
    Overwrite an existing saved_model directory.
    """)
    parser.add_argument("--warm_start", action="store_true", help="""
    This flag needs to be set if the model changes somehow.
    (Ex: when we freeze the backend network and train the head with AdamOptimizer,
    and we want to unfreeze the backend network. Adam's vars will not be set for
    the backend network)
    """)
Ejemplo n.º 3
0
    submission = common.Submission(sub_file)
    logger = logging.getLogger("predictor")
    logger.info(f"Using batch of {batch_size}")
    logger.info(f"Using {paralell_calls} paralell calls")

    tf.logging.set_verbosity(tf.logging.INFO)
    for i, predictions in enumerate(estimator.predict(input_fn, desired_pred)):
        i += 1

        img_id = predictions[estimator.IMAGE_ID]
        scores = predictions[estimator.SCORES]
        labels = dir_dataset.vector_label((scores > 0.5).astype(np.int))
        submission.add_submission(img_id.decode(), labels)

        if i % 100 == 0:
            logger.info(f"Wrote {i} examples.")

    submission.end_sumbission()
    logger.info(f"Finished, wrote {i} examples.")


if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)
    logging.getLogger("tensorflow").propagate = False
    tf.logging.set_verbosity(tf.logging.ERROR)

    make_submission(
        10, 20,
        os.path.join(common.PathsJson().SUBMISSION_DIR, "prediction.csv"))