def make_submission(batch_size: int, paralell_calls: int, sub_file: str): estimator = model_module.ProteinEstimator() model_config = common.TFHubModels(common.ConfigurationJson().TF_HUB_MODULE) dir_dataset = dataset_module.Dataset(common.PathsJson().DIR_TRAIN) img_ids_paths = dir_dataset.scan_dir() def g(): for items in img_ids_paths.items(): yield items def input_fn(): def _map_fn(img_id, paths): img = dataset_module.tf_load_image(paths) img = tf.image.crop_to_bounding_box( img, tf.to_int32( (tf.shape(img)[0] - model_config.expected_image_size[0]) / 2), tf.to_int32( (tf.shape(img)[1] - model_config.expected_image_size[1]) / 2), model_config.expected_image_size[0], model_config.expected_image_size[1], )[:, :, 0:3] / 255 return { dataset_module.TFRecordKeys.ID: img_id, dataset_module.TFRecordKeys.DECODED: img } dataset = tf.data.Dataset.from_generator(g, (tf.string, tf.string), ([], [None])) \ .apply(tf.data.experimental.map_and_batch(_map_fn, batch_size)) \ .prefetch(None) return dataset desired_pred = [estimator.IMAGE_ID, estimator.SCORES] submission = common.Submission(sub_file) logger = logging.getLogger("predictor") logger.info(f"Using batch of {batch_size}") logger.info(f"Using {paralell_calls} paralell calls") tf.logging.set_verbosity(tf.logging.INFO) for i, predictions in enumerate(estimator.predict(input_fn, desired_pred)): i += 1 img_id = predictions[estimator.IMAGE_ID] scores = predictions[estimator.SCORES] labels = dir_dataset.vector_label((scores > 0.5).astype(np.int)) submission.add_submission(img_id.decode(), labels) if i % 100 == 0: logger.info(f"Wrote {i} examples.") submission.end_sumbission() logger.info(f"Finished, wrote {i} examples.")
tflogger.setLevel(logging.INFO) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) if __name__ == "__main__": import argparse import os logging.basicConfig(level=logging.INFO) logger = logging.getLogger("train_script") logging.getLogger("tensorflow").setLevel(tf.logging.ERROR) config = common.ConfigurationJson() paths = common.PathsJson() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' parser = argparse.ArgumentParser( description="Train the model on parsed data.") parser.add_argument("--override", action="store_true", help=""" Overwrite an existing saved_model directory. """) parser.add_argument("--warm_start", action="store_true", help=""" This flag needs to be set if the model changes somehow. (Ex: when we freeze the backend network and train the head with AdamOptimizer, and we want to unfreeze the backend network. Adam's vars will not be set for the backend network) """)
submission = common.Submission(sub_file) logger = logging.getLogger("predictor") logger.info(f"Using batch of {batch_size}") logger.info(f"Using {paralell_calls} paralell calls") tf.logging.set_verbosity(tf.logging.INFO) for i, predictions in enumerate(estimator.predict(input_fn, desired_pred)): i += 1 img_id = predictions[estimator.IMAGE_ID] scores = predictions[estimator.SCORES] labels = dir_dataset.vector_label((scores > 0.5).astype(np.int)) submission.add_submission(img_id.decode(), labels) if i % 100 == 0: logger.info(f"Wrote {i} examples.") submission.end_sumbission() logger.info(f"Finished, wrote {i} examples.") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) logging.getLogger("tensorflow").propagate = False tf.logging.set_verbosity(tf.logging.ERROR) make_submission( 10, 20, os.path.join(common.PathsJson().SUBMISSION_DIR, "prediction.csv"))