Beispiel #1
0
CorpusPreProcessPipeline = Pipeline()
CorpusPreProcessPipeline.enqueue(
    "corpus", "Read Corpus", lambda f, x: x["config"].reader[f["set"]]
    (f["set"]))
CorpusPreProcessPipeline.enqueue(
    "graphify", "RDF to Graph",
    lambda f, _: f["corpus"].copy().generate_graphs())
CorpusPreProcessPipeline.enqueue(
    "spelling", "Fix Spelling",
    lambda f, _: f["graphify"].copy().fix_spelling())
CorpusPreProcessPipeline.enqueue(
    "entities", "Describe entities",
    lambda f, _: f["spelling"].copy().describe_entities())

TestCorpusPreProcessPipeline = CorpusPreProcessPipeline.mutate(
    {})  # Test does not need matching entities or plans

CorpusPreProcessPipeline.enqueue(
    "match-ents", "Match Entities",
    lambda f, _: f["entities"].copy().match_entities())
CorpusPreProcessPipeline.enqueue(
    "match-plans", "Match Plans",
    lambda f, _: f["match-ents"].copy().match_plans())
CorpusPreProcessPipeline.enqueue(
    "tokenize", "Tokenize Plans & Sentences",
    lambda f, _: f["match-plans"].copy().tokenize_plans().tokenize_delex())
CorpusPreProcessPipeline.enqueue(
    "to-json",
    "Export in a readable format",
    lambda f, _: json.dumps(f["tokenize"].export()),
    ext="json")
Beispiel #2
0
MainPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline)
MainPipeline.enqueue("translate", "Translate Test", TranslatePipeline)
MainPipeline.enqueue("evaluate", "Evaluate Translations", EvaluationPipeline)

if __name__ == "__main__":
    naive_planner = NaivePlanner(WeightedProductOfExperts([
        RelationDirectionExpert,
        GlobalDirectionExpert,
        SplittingTendenciesExpert,
        RelationTransitionsExpert
    ]))
    # neural_planner = NeuralPlanner()
    # combined_planner = CombinedPlanner((neural_planner, naive_planner))
    config = Config(reader=WebNLGDataReader,
                    planner=naive_planner)

    res = MainPipeline.mutate({"config": config}).execute("WebNLG", cache_name="WebNLG")

    print()

    d = random.choice(res["translate"].data)
    print("Random Sample:")
    print("Graph:", d.graph.as_rdf())
    print("Plan:", d.plan)
    print("Translation:", d.hyp)
    print("Reference:  ", d.text)

    print()

    print("BLEU", res["evaluate"])
Beispiel #3
0
neural_planner = NeuralPlanner()

PlanPipeline = Pipeline()
PlanPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline)
PlanPipeline.enqueue("test-corpus", "Pre-process test data",
                     TestingPreProcessPipeline)

ExperimentsPipeline = Pipeline()
ExperimentsPipeline.enqueue("pre-process", "Pre-process training data",
                            TrainingPreProcessPipeline)

# Train all planners
# # Naive Planner
ExperimentsPipeline.enqueue(
    "naive-planner", "Train Naive Planner",
    PlanPipeline.mutate(
        {"config": Config(reader=WebNLGDataReader, planner=naive_planner)}))
# # Neural Planner
ExperimentsPipeline.enqueue(
    "neural-planner", "Train Neural Planner",
    PlanPipeline.mutate(
        {"config": Config(reader=WebNLGDataReader, planner=neural_planner)}))

# REG
# # Bert REG
ExperimentsPipeline.enqueue(
    "naive-reg", "Train Naive Referring Expressions Generator",
    REGPipeline.mutate({"config": Config(reg=NaiveREG)}))
# # Naive REG
ExperimentsPipeline.enqueue(
    "bert-reg", "Train BERT Referring Expressions Generator",
    REGPipeline.mutate({"config": Config(reg=BertREG)}))