Esempio n. 1
0
def model_pipeline(train_config):
    pipeline = Pipeline()
    pipeline.enqueue(
        "train-model", "Train Model",
        TrainModelPipeline.mutate({
            "train-config": train_config,
            "test-config": DEFAULT_TEST_CONFIG
        }))
    pipeline.enqueue(
        "translate-naive", "Translate Naive Plans",
        PlannerTranslatePipeline.mutate({"planner-name": "naive"}))
    pipeline.enqueue(
        "translate-neural", "Translate Neural Plans",
        PlannerTranslatePipeline.mutate({"planner-name": "neural"}))
    return pipeline
Esempio n. 2
0
from utils.pipeline import Pipeline

# CoverageEvaluationPipeline = Pipeline()
# CoverageEvaluationPipeline.enqueue("plan-all", "Plan all & score on test set",
#                                    lambda f, x: x["test-corpus"].copy().exhaustive_plan(x["train-planner"]))
# CoverageEvaluationPipeline.enqueue("print", "Print stuff",
#                                    lambda f, x: "\n".join([str(len(d.graph.edges)) + " - " + str(len(d.plans)) for d in f["plan-all"].data]), ext="txt")

EvaluationPipeline = Pipeline()
EvaluationPipeline.enqueue("bleu", "Evaluate test reader", lambda f, x: x["translate"].evaluate())
# EvaluationPipeline.enqueue("coverage", "Coverage evaluation", CoverageEvaluationPipeline)
Esempio n. 3
0
from scorer.splitting_tendencies import SplittingTendenciesExpert
from utils.pipeline import Pipeline


class Config:
    def __init__(self, reader: DataReader, planner: Planner, test_reader: DataReader = None):
        self.reader = {
            DataSetType.TRAIN: reader,
            DataSetType.DEV: reader,
            DataSetType.TEST: test_reader if test_reader else reader,
        }
        self.planner = planner


MainPipeline = Pipeline()
MainPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline)
MainPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline)
MainPipeline.enqueue("train-model", "Train Model", TrainModelPipeline)
MainPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline)
MainPipeline.enqueue("translate", "Translate Test", TranslatePipeline)
MainPipeline.enqueue("evaluate", "Evaluate Translations", EvaluationPipeline)

if __name__ == "__main__":
    naive_planner = NaivePlanner(WeightedProductOfExperts([
        RelationDirectionExpert,
        GlobalDirectionExpert,
        SplittingTendenciesExpert,
        RelationTransitionsExpert
    ]))
    # neural_planner = NeuralPlanner()
    # combined_planner = CombinedPlanner((neural_planner, naive_planner))
Esempio n. 4
0
        "feat_vec_size": 10,
        "feat_merge": "concat",
        "layers": 3,
        "copy_attn": None,
        "position_encoding": None
    }
}

DEFAULT_TEST_CONFIG = {"beam_size": 5, "find_best": True}

TrainModelPipeline = Pipeline({
    "train-config": DEFAULT_TRAIN_CONFIG,
    "test-config": DEFAULT_TEST_CONFIG
})
TrainModelPipeline.enqueue(
    "model", "Initialize OpenNMT",
    lambda f, x: OpenNMTModelRunner(x["pre-process"]["train"], x["pre-process"]
                                    ["dev"], x["train-config"]["features"]))
TrainModelPipeline.enqueue("expose",
                           "Expose Train Data",
                           lambda f, x: f["model"].expose_train(),
                           ext="txt")
TrainModelPipeline.enqueue("pre-process", "Pre-process Train and Dev",
                           lambda f, x: f["model"].pre_process())
TrainModelPipeline.enqueue(
    "train", "Train model", lambda f, x: f["model"].train(
        f["pre-process"], f["train-config"]["train"]))
TrainModelPipeline.enqueue(
    "find-best", "Find best model",
    lambda f, x: f["model"].find_best(f["train"], f["test-config"]))
TrainModelPipeline.enqueue("out", "Output a model instance",
                           lambda f, x: f["find-best"])
Esempio n. 5
0
def unique_plans_outputs(reader):
    plan_hyp_refs = defaultdict(lambda: ["", []])
    for d in reader.data:
        plan_hyp_refs[d.plan][0] = d.hyp
        plan_hyp_refs[d.plan][1].append(d.text)

    return dict(plan_hyp_refs)


def plans_output_single_file(plan_hyp_refs):
    return ["\n".join([plan, hyp, "---"] + refs) for plan, (hyp, refs) in plan_hyp_refs.items()]


TranslatePipeline = Pipeline()

TranslatePipeline.enqueue("translate", "Translate all plans",
                          lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"], x["train-planner"]))
TranslatePipeline.enqueue("post-process", "Post-process translated sentences",
                          lambda f, x: f["translate"].copy().post_process(x["train-reg"]))
TranslatePipeline.enqueue("plans-out", "Create a dictionary of outputs",
                          lambda f, x: unique_plans_outputs(f["post-process"]))
TranslatePipeline.enqueue("review", "Create hypothesis-references review file",
                          lambda f, x: "\n\n".join(["\n".join([plan, hyp, "---"] + refs)
                                                    for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")
TranslatePipeline.enqueue("hypothesis", "Create hypothesis file",
                          lambda f, x: "\n".join([hyp for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")
TranslatePipeline.enqueue("references", "Create references file",
                          lambda f, x: "\n\n".join(["\n".join(refs)
                                                    for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")
TranslatePipeline.enqueue("out", "Expose output for parent", lambda f, _: f["post-process"].copy())
Esempio n. 6
0
import json

from data.WebNLG.reader import WebNLGDataReader
from data.reader import DataSetType
from utils.error_bar import error_bar
from utils.pipeline import Pipeline, ParallelPipeline

CorpusPreProcessPipeline = Pipeline()
CorpusPreProcessPipeline.enqueue(
    "corpus", "Read Corpus", lambda f, x: x["config"].reader[f["set"]]
    (f["set"]))
CorpusPreProcessPipeline.enqueue(
    "graphify", "RDF to Graph",
    lambda f, _: f["corpus"].copy().generate_graphs())
CorpusPreProcessPipeline.enqueue(
    "spelling", "Fix Spelling",
    lambda f, _: f["graphify"].copy().fix_spelling())
CorpusPreProcessPipeline.enqueue(
    "entities", "Describe entities",
    lambda f, _: f["spelling"].copy().describe_entities())

TestCorpusPreProcessPipeline = CorpusPreProcessPipeline.mutate(
    {})  # Test does not need matching entities or plans

CorpusPreProcessPipeline.enqueue(
    "match-ents", "Match Entities",
    lambda f, _: f["entities"].copy().match_entities())
CorpusPreProcessPipeline.enqueue(
    "match-plans", "Match Plans",
    lambda f, _: f["match-ents"].copy().match_plans())
CorpusPreProcessPipeline.enqueue(
Esempio n. 7
0
from utils.pipeline import Pipeline

EvaluationPipeline = Pipeline()

EvaluationPipeline.enqueue("evaluate", "Evaluate test reader",
                           lambda f, x: x["translate"].evaluate())
EvaluationPipeline.enqueue("out", "Expose output for parent",
                           lambda f, _: f["evaluate"].copy())
Esempio n. 8
0
from utils.pipeline import Pipeline

TrainPlannerPipeline = Pipeline()
TrainPlannerPipeline.enqueue("planner", "Learn planner",
                             lambda _, x: x["config"].planner.learn(x["pre-process"]["train"], x["pre-process"]["dev"]))
TrainPlannerPipeline.enqueue("out", "Expose the planner", lambda f, _: f["planner"])
Esempio n. 9
0
from scorer.global_direction import GlobalDirectionExpert
from scorer.product_of_experts import WeightedProductOfExperts
from scorer.relation_direction import RelationDirectionExpert
from scorer.relation_transitions import RelationTransitionsExpert
from scorer.splitting_tendencies import SplittingTendenciesExpert
from utils.pipeline import Pipeline

naive_planner = NaivePlanner(
    WeightedProductOfExperts([
        RelationDirectionExpert, GlobalDirectionExpert,
        SplittingTendenciesExpert, RelationTransitionsExpert
    ]))
neural_planner = NeuralPlanner()

PlanPipeline = Pipeline()
PlanPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline)
PlanPipeline.enqueue("test-corpus", "Pre-process test data",
                     TestingPreProcessPipeline)

ExperimentsPipeline = Pipeline()
ExperimentsPipeline.enqueue("pre-process", "Pre-process training data",
                            TrainingPreProcessPipeline)

# Train all planners
# # Naive Planner
ExperimentsPipeline.enqueue(
    "naive-planner", "Train Naive Planner",
    PlanPipeline.mutate(
        {"config": Config(reader=WebNLGDataReader, planner=naive_planner)}))
# # Neural Planner
ExperimentsPipeline.enqueue(
Esempio n. 10
0
from utils.pipeline import Pipeline

REGPipeline = Pipeline()
REGPipeline.enqueue(
    "reg", "Learn planner", lambda _, x: x["config"].reg(
        x["pre-process"]["train"], x["pre-process"]["dev"]))
REGPipeline.enqueue("out", "Expose the reg", lambda f, _: f["reg"])
Esempio n. 11
0
from utils.pipeline import Pipeline


def unique_plans_outputs(reader):
    mapper = {d.plan: d.hyp for d in reader.data}
    print(len(mapper))
    return list(mapper.values())


TranslatePipeline = Pipeline()
TranslatePipeline.enqueue(
    "translate", "Translate all plans",
    lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"]))
TranslatePipeline.enqueue("post-process", "Post-process translated sentences",
                          lambda f, _: f["translate"].copy().post_process())
TranslatePipeline.enqueue(
    "hypothesis", "Create hypothesis file",
    lambda f, x: "\n".join(unique_plans_outputs(f["post-process"])))
TranslatePipeline.enqueue("out", "Expose output for parent",
                          lambda f, _: f["post-process"].copy())