Exemple #1
0
class NonStandardPhraseFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "The paraphrase has NONSTANDARD_PHRASES"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['NONSTANDARD_PHRASES'])
        ps = self.spellcheker.check(paraphrase, ['NONSTANDARD_PHRASES'])
        return len(ps) / (len(ss) + 1)
Exemple #2
0
class PunctuationFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "The paraphrase has Punctuation errors"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['TYPOGRAPHY', 'PUNCTUATION'])
        ps = self.spellcheker.check(paraphrase, ['TYPOGRAPHY', 'PUNCTUATION'])
        return len(ps) / (len(ss) + 1)
Exemple #3
0
class MiscErrorFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "The paraphrase has semantic errors"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['STYLE', 'MISC'])
        ps = self.spellcheker.check(paraphrase, ['STYLE', 'MISC'])
        return len(ps) / (len(ss) + 1)
Exemple #4
0
class ConfusedWordsFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "The paraphrase has CONFUSED_WORDS"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['CONFUSED_WORDS'])
        ps = self.spellcheker.check(paraphrase, ['CONFUSED_WORDS'])
        return len(ps) / (len(ss) + 1)
Exemple #5
0
class CollocationFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "Use correct collocations"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['COLLOCATIONS'])
        ps = self.spellcheker.check(paraphrase, ['COLLOCATIONS'])
        return len(ps) / (len(ss) + 1)
Exemple #6
0
class GrammarFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "The paraphrase should not have grammatical errors"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['GRAMMAR'])
        ps = self.spellcheker.check(paraphrase, ['GRAMMAR'])
        return len(ps) / (len(ss) + 1)
Exemple #7
0
class SpellingFF(FeatureFunction):
    spellcheker = SpellChecker()

    def explain(self, feature_value):
        if feature_value > 0:
            return "The paraphrase should not have spelling errors"
        return ""

    def extract(self, source, paraphrase, position):
        ss = self.spellcheker.check(source, ['TYPOS'],
                                    excludes_ids={'I_LOWERCASE'})
        ps = self.spellcheker.check(paraphrase, ['TYPOS'],
                                    excludes_ids={'I_LOWERCASE'})
        return len(ps) / (len(ss) + .1)
Exemple #8
0
from utils.preprocess import tokenize, syllables, pos_tag, remove_marks
from utils.service import sentence2vec_client as sentence2vec
# from utils.service.ginger import correct
from utils.service.language_tool import SpellChecker
from utils.service.sts import StsSimilarity
from utils.service.word2vec_client import n_similarity, wm_distance
from utils.text import lcs
from langid.langid import LanguageIdentifier, model

identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
corrector = jamspell.TSpellCorrector()
corrector.LoadLangModel(
    '/media/may/Data/LinuxFiles/PycharmProjects/PhD/paraphrasing-data/en.spell.bin'
)
sts = StsSimilarity()
spellcheker = SpellChecker()

# ginger_map = pickle.load(
#     open("/media/may/Data/LinuxFiles/PycharmProjects/PhD/paraphrasing-data/ginger_correction_map.pickle", 'rb'))
#
# newmap = {}
# for k in ginger_map:
#     kn = remove_marks(k)
#     newmap[kn] = ginger_map[k]
# ginger_map = newmap


def do_not_preprocess():
    def wrapper(f):
        f.do_not_preprocess = True
        return f