def model_pipeline(train_config): pipeline = Pipeline() pipeline.enqueue( "train-model", "Train Model", TrainModelPipeline.mutate({ "train-config": train_config, "test-config": DEFAULT_TEST_CONFIG })) pipeline.enqueue( "translate-naive", "Translate Naive Plans", PlannerTranslatePipeline.mutate({"planner-name": "naive"})) pipeline.enqueue( "translate-neural", "Translate Neural Plans", PlannerTranslatePipeline.mutate({"planner-name": "neural"})) return pipeline
from utils.pipeline import Pipeline # CoverageEvaluationPipeline = Pipeline() # CoverageEvaluationPipeline.enqueue("plan-all", "Plan all & score on test set", # lambda f, x: x["test-corpus"].copy().exhaustive_plan(x["train-planner"])) # CoverageEvaluationPipeline.enqueue("print", "Print stuff", # lambda f, x: "\n".join([str(len(d.graph.edges)) + " - " + str(len(d.plans)) for d in f["plan-all"].data]), ext="txt") EvaluationPipeline = Pipeline() EvaluationPipeline.enqueue("bleu", "Evaluate test reader", lambda f, x: x["translate"].evaluate()) # EvaluationPipeline.enqueue("coverage", "Coverage evaluation", CoverageEvaluationPipeline)
from scorer.splitting_tendencies import SplittingTendenciesExpert from utils.pipeline import Pipeline class Config: def __init__(self, reader: DataReader, planner: Planner, test_reader: DataReader = None): self.reader = { DataSetType.TRAIN: reader, DataSetType.DEV: reader, DataSetType.TEST: test_reader if test_reader else reader, } self.planner = planner MainPipeline = Pipeline() MainPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline) MainPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline) MainPipeline.enqueue("train-model", "Train Model", TrainModelPipeline) MainPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline) MainPipeline.enqueue("translate", "Translate Test", TranslatePipeline) MainPipeline.enqueue("evaluate", "Evaluate Translations", EvaluationPipeline) if __name__ == "__main__": naive_planner = NaivePlanner(WeightedProductOfExperts([ RelationDirectionExpert, GlobalDirectionExpert, SplittingTendenciesExpert, RelationTransitionsExpert ])) # neural_planner = NeuralPlanner() # combined_planner = CombinedPlanner((neural_planner, naive_planner))
"feat_vec_size": 10, "feat_merge": "concat", "layers": 3, "copy_attn": None, "position_encoding": None } } DEFAULT_TEST_CONFIG = {"beam_size": 5, "find_best": True} TrainModelPipeline = Pipeline({ "train-config": DEFAULT_TRAIN_CONFIG, "test-config": DEFAULT_TEST_CONFIG }) TrainModelPipeline.enqueue( "model", "Initialize OpenNMT", lambda f, x: OpenNMTModelRunner(x["pre-process"]["train"], x["pre-process"] ["dev"], x["train-config"]["features"])) TrainModelPipeline.enqueue("expose", "Expose Train Data", lambda f, x: f["model"].expose_train(), ext="txt") TrainModelPipeline.enqueue("pre-process", "Pre-process Train and Dev", lambda f, x: f["model"].pre_process()) TrainModelPipeline.enqueue( "train", "Train model", lambda f, x: f["model"].train( f["pre-process"], f["train-config"]["train"])) TrainModelPipeline.enqueue( "find-best", "Find best model", lambda f, x: f["model"].find_best(f["train"], f["test-config"])) TrainModelPipeline.enqueue("out", "Output a model instance", lambda f, x: f["find-best"])
def unique_plans_outputs(reader): plan_hyp_refs = defaultdict(lambda: ["", []]) for d in reader.data: plan_hyp_refs[d.plan][0] = d.hyp plan_hyp_refs[d.plan][1].append(d.text) return dict(plan_hyp_refs) def plans_output_single_file(plan_hyp_refs): return ["\n".join([plan, hyp, "---"] + refs) for plan, (hyp, refs) in plan_hyp_refs.items()] TranslatePipeline = Pipeline() TranslatePipeline.enqueue("translate", "Translate all plans", lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"], x["train-planner"])) TranslatePipeline.enqueue("post-process", "Post-process translated sentences", lambda f, x: f["translate"].copy().post_process(x["train-reg"])) TranslatePipeline.enqueue("plans-out", "Create a dictionary of outputs", lambda f, x: unique_plans_outputs(f["post-process"])) TranslatePipeline.enqueue("review", "Create hypothesis-references review file", lambda f, x: "\n\n".join(["\n".join([plan, hyp, "---"] + refs) for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt") TranslatePipeline.enqueue("hypothesis", "Create hypothesis file", lambda f, x: "\n".join([hyp for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt") TranslatePipeline.enqueue("references", "Create references file", lambda f, x: "\n\n".join(["\n".join(refs) for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt") TranslatePipeline.enqueue("out", "Expose output for parent", lambda f, _: f["post-process"].copy())
import json from data.WebNLG.reader import WebNLGDataReader from data.reader import DataSetType from utils.error_bar import error_bar from utils.pipeline import Pipeline, ParallelPipeline CorpusPreProcessPipeline = Pipeline() CorpusPreProcessPipeline.enqueue( "corpus", "Read Corpus", lambda f, x: x["config"].reader[f["set"]] (f["set"])) CorpusPreProcessPipeline.enqueue( "graphify", "RDF to Graph", lambda f, _: f["corpus"].copy().generate_graphs()) CorpusPreProcessPipeline.enqueue( "spelling", "Fix Spelling", lambda f, _: f["graphify"].copy().fix_spelling()) CorpusPreProcessPipeline.enqueue( "entities", "Describe entities", lambda f, _: f["spelling"].copy().describe_entities()) TestCorpusPreProcessPipeline = CorpusPreProcessPipeline.mutate( {}) # Test does not need matching entities or plans CorpusPreProcessPipeline.enqueue( "match-ents", "Match Entities", lambda f, _: f["entities"].copy().match_entities()) CorpusPreProcessPipeline.enqueue( "match-plans", "Match Plans", lambda f, _: f["match-ents"].copy().match_plans()) CorpusPreProcessPipeline.enqueue(
from utils.pipeline import Pipeline EvaluationPipeline = Pipeline() EvaluationPipeline.enqueue("evaluate", "Evaluate test reader", lambda f, x: x["translate"].evaluate()) EvaluationPipeline.enqueue("out", "Expose output for parent", lambda f, _: f["evaluate"].copy())
from utils.pipeline import Pipeline TrainPlannerPipeline = Pipeline() TrainPlannerPipeline.enqueue("planner", "Learn planner", lambda _, x: x["config"].planner.learn(x["pre-process"]["train"], x["pre-process"]["dev"])) TrainPlannerPipeline.enqueue("out", "Expose the planner", lambda f, _: f["planner"])
from scorer.global_direction import GlobalDirectionExpert from scorer.product_of_experts import WeightedProductOfExperts from scorer.relation_direction import RelationDirectionExpert from scorer.relation_transitions import RelationTransitionsExpert from scorer.splitting_tendencies import SplittingTendenciesExpert from utils.pipeline import Pipeline naive_planner = NaivePlanner( WeightedProductOfExperts([ RelationDirectionExpert, GlobalDirectionExpert, SplittingTendenciesExpert, RelationTransitionsExpert ])) neural_planner = NeuralPlanner() PlanPipeline = Pipeline() PlanPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline) PlanPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline) ExperimentsPipeline = Pipeline() ExperimentsPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline) # Train all planners # # Naive Planner ExperimentsPipeline.enqueue( "naive-planner", "Train Naive Planner", PlanPipeline.mutate( {"config": Config(reader=WebNLGDataReader, planner=naive_planner)})) # # Neural Planner ExperimentsPipeline.enqueue(
from utils.pipeline import Pipeline REGPipeline = Pipeline() REGPipeline.enqueue( "reg", "Learn planner", lambda _, x: x["config"].reg( x["pre-process"]["train"], x["pre-process"]["dev"])) REGPipeline.enqueue("out", "Expose the reg", lambda f, _: f["reg"])
from utils.pipeline import Pipeline def unique_plans_outputs(reader): mapper = {d.plan: d.hyp for d in reader.data} print(len(mapper)) return list(mapper.values()) TranslatePipeline = Pipeline() TranslatePipeline.enqueue( "translate", "Translate all plans", lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"])) TranslatePipeline.enqueue("post-process", "Post-process translated sentences", lambda f, _: f["translate"].copy().post_process()) TranslatePipeline.enqueue( "hypothesis", "Create hypothesis file", lambda f, x: "\n".join(unique_plans_outputs(f["post-process"]))) TranslatePipeline.enqueue("out", "Expose output for parent", lambda f, _: f["post-process"].copy())