Exemple #1
0
def segmentation_pipeline(p_mergehyphen, p_splitcomma, p_split, p_punctadd):
    pipeline = Pipeline()
    pipeline.addModule(MergeWordHyphenModule(p_mergehyphen))
    # Potrebbe essere possibile unire questi due moduli
    pipeline.addModule(SplitWithCommaModule(p_splitcomma, ","))
    pipeline.addModule(SplitModuleGenerator(p_split))
    pipeline.addModule(AddPunctuationModule(p_punctadd, "."))
    pipeline.addModule(AddPunctuationModule(p_punctadd / 2, ","))
    pipeline.addModule(AddPunctuationModule(p_punctadd / 2, "'"))
    return pipeline
Exemple #2
0
def token_pipeline(p_charsub, sub_data):
    pipeline = Pipeline()
    pipeline.addModule(CharsSubModule(sub_data, p_charsub))
    return pipeline