def main(args): if args.do_eda: do_eda(args) elif args.do_submit: do_submit(args) else: class AppTrainer(GlueTrainer): def __init__(self, args, glue_labels): super(AppTrainer, self).__init__(args, glue_labels, build_model=None) # def on_predict_end(self, args, test_dataset): # super(Trainer, self).on_predict_end(args, test_dataset) trainer = AppTrainer(args, glue_labels) def do_train(args): train_examples, val_examples = load_train_val_examples(args) trainer.train(args, train_examples, val_examples) def do_eval(args): args.model_path = args.best_model_path _, eval_examples = load_train_val_examples(args) model = load_model(args) trainer.evaluate(args, model, eval_examples) def do_predict(args): args.model_path = args.best_model_path test_examples = load_test_examples(args) model = load_model(args) trainer.predict(args, model, test_examples) reviews_file = save_glue_preds(args, trainer.pred_results, test_examples) return reviews_file if args.do_train: do_train(args) elif args.do_eval: do_eval(args) elif args.do_predict: do_predict(args) elif args.do_experiment: if args.tracking_uri: mlflow.set_tracking_uri(args.tracking_uri) mlflow.set_experiment(args.experiment_name) with mlflow.start_run(run_name=f"{args.local_id}") as mlrun: log_global_params(args, experiment_params) # ----- Train ----- do_train(args) # ----- Predict ----- do_predict(args) # ----- Submit ----- do_submit(args)
def main(args): def do_eda(args): show_ner_datainfo(ner_labels, train_data_generator, args.train_file, test_data_generator, args.test_file) def do_submit(args): generate_submission(args) if args.do_eda: do_eda(args) elif args.do_submit: do_submit(args) elif args.to_train_poplar: from theta.modeling import to_train_poplar to_train_poplar(args, train_data_generator, ner_labels=ner_labels, ner_connections=[], start_page=args.start_page, max_pages=args.max_pages) elif args.to_reviews_poplar: from theta.modeling import to_reviews_poplar to_reviews_poplar(args, ner_labels=ner_labels, ner_connections=[], start_page=args.start_page, max_pages=args.max_pages) else: # -------------------- Model -------------------- # if args.ner_type == 'span': # from theta.modeling.ner_span import NerTrainer # else: # from theta.modeling.ner import NerTrainer class AppTrainer(NerTrainer): def __init__(self, args, ner_labels): super(AppTrainer, self).__init__(args, ner_labels, build_model=None) # def on_predict_end(self, args, test_dataset): # super(Trainer, self).on_predict_end(args, test_dataset) trainer = AppTrainer(args, ner_labels) def do_train(args): train_examples, val_examples = load_train_val_examples(args) trainer.train(args, train_examples, val_examples) def do_eval(args): args.model_path = args.best_model_path _, eval_examples = load_train_val_examples(args) model = load_model(args) trainer.evaluate(args, model, eval_examples) def do_predict(args): args.model_path = args.best_model_path test_examples = load_test_examples(args) model = load_model(args) trainer.predict(args, model, test_examples) reviews_file, category_mentions_file = save_ner_preds( args, trainer.pred_results, test_examples) return reviews_file, category_mentions_file if args.do_train: do_train(args) elif args.do_eval: do_eval(args) elif args.do_predict: do_predict(args) elif args.do_experiment: if args.tracking_uri: mlflow.set_tracking_uri(args.tracking_uri) mlflow.set_experiment(args.experiment_name) with mlflow.start_run(run_name=f"{args.local_id}") as mlrun: log_global_params(args, experiment_params) # ----- Train ----- do_train(args) # ----- Predict ----- do_predict(args) # ----- Submit ----- do_submit(args)