def main(argv): # Define args args: Namespace = get_args(argv) # Initialize Relevance Pipeline and run in train/inference mode rp = RankingPipeline(args=args) rp.run()
def setUp( self, output_dir: str = OUTPUT_DIR, root_data_dir: str = ROOT_DATA_DIR, feature_config_fname: str = FEATURE_CONFIG_FNAME, ): self.output_dir = output_dir self.root_data_dir = root_data_dir self.feature_config_fname = feature_config_fname # Make temp output directory file_io.make_directory(self.output_dir, clear_dir=True) # Fix random seed values for repeatability tf.keras.backend.clear_session() np.random.seed(123) tf.random.set_seed(123) random.seed(123) # Setup arguments self.args: Namespace = get_args([]) self.args.models_dir = output_dir self.args.logs_dir = output_dir # Load model_config self.model_config = file_io.read_yaml(self.args.model_config) # Setup logging outfile: str = os.path.join(self.args.logs_dir, "output_log.csv") self.logger = setup_logging(reset=True, file_name=outfile, log_to_file=True)
def create_parse_args(data_dir, feature_config, model_config, logs_dir, num_folds, use_testset_in_folds): argv = [ "--data_dir", data_dir, "--feature_config", feature_config, "--kfold", str(num_folds), "--include_testset_in_kfold", str(use_testset_in_folds), "--run_id", "testing_kfold_cs", "--data_format", "csv", "--execution_mode", "train_inference_evaluate", "--num_epochs", "1", "--model_config", model_config, "--batch_size", "4", "--logs_dir", logs_dir, "--max_sequence_size", "25", "--train_pcent_split", "1.0", "--val_pcent_split", "-1", "--test_pcent_split", "-1" ] args = get_args(argv) return args
def train_ml4ir(data_dir, feature_config, model_config, logs_dir): """ Train a pointwise ranker, listwise loss model using ml4ir """ argv = [ "--data_dir", data_dir, "--feature_config", feature_config, "--loss_type", "listwise", "--scoring_type", "listwise", "--run_id", "test_command_line", "--data_format", "csv", "--execution_mode", "train_evaluate", "--loss_key", "rank_one_listnet", "--num_epochs", "150", "--model_config", model_config, "--batch_size", "1", "--logs_dir", logs_dir, "--max_sequence_size", "25", "--train_pcent_split", "1.0", "--val_pcent_split", "-1", "--test_pcent_split", "-1", "--early_stopping_patience", "25", "--metrics_keys", "MRR", "categorical_accuracy", "--monitor_metric", "categorical_accuracy" ] args = get_args(argv) rp = RankingPipeline(args=args) rp.run()