class NonStandardPhraseFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "The paraphrase has NONSTANDARD_PHRASES" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['NONSTANDARD_PHRASES']) ps = self.spellcheker.check(paraphrase, ['NONSTANDARD_PHRASES']) return len(ps) / (len(ss) + 1)
class PunctuationFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "The paraphrase has Punctuation errors" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['TYPOGRAPHY', 'PUNCTUATION']) ps = self.spellcheker.check(paraphrase, ['TYPOGRAPHY', 'PUNCTUATION']) return len(ps) / (len(ss) + 1)
class MiscErrorFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "The paraphrase has semantic errors" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['STYLE', 'MISC']) ps = self.spellcheker.check(paraphrase, ['STYLE', 'MISC']) return len(ps) / (len(ss) + 1)
class ConfusedWordsFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "The paraphrase has CONFUSED_WORDS" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['CONFUSED_WORDS']) ps = self.spellcheker.check(paraphrase, ['CONFUSED_WORDS']) return len(ps) / (len(ss) + 1)
class CollocationFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "Use correct collocations" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['COLLOCATIONS']) ps = self.spellcheker.check(paraphrase, ['COLLOCATIONS']) return len(ps) / (len(ss) + 1)
class GrammarFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "The paraphrase should not have grammatical errors" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['GRAMMAR']) ps = self.spellcheker.check(paraphrase, ['GRAMMAR']) return len(ps) / (len(ss) + 1)
class SpellingFF(FeatureFunction): spellcheker = SpellChecker() def explain(self, feature_value): if feature_value > 0: return "The paraphrase should not have spelling errors" return "" def extract(self, source, paraphrase, position): ss = self.spellcheker.check(source, ['TYPOS'], excludes_ids={'I_LOWERCASE'}) ps = self.spellcheker.check(paraphrase, ['TYPOS'], excludes_ids={'I_LOWERCASE'}) return len(ps) / (len(ss) + .1)
from utils.preprocess import tokenize, syllables, pos_tag, remove_marks from utils.service import sentence2vec_client as sentence2vec # from utils.service.ginger import correct from utils.service.language_tool import SpellChecker from utils.service.sts import StsSimilarity from utils.service.word2vec_client import n_similarity, wm_distance from utils.text import lcs from langid.langid import LanguageIdentifier, model identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True) corrector = jamspell.TSpellCorrector() corrector.LoadLangModel( '/media/may/Data/LinuxFiles/PycharmProjects/PhD/paraphrasing-data/en.spell.bin' ) sts = StsSimilarity() spellcheker = SpellChecker() # ginger_map = pickle.load( # open("/media/may/Data/LinuxFiles/PycharmProjects/PhD/paraphrasing-data/ginger_correction_map.pickle", 'rb')) # # newmap = {} # for k in ginger_map: # kn = remove_marks(k) # newmap[kn] = ginger_map[k] # ginger_map = newmap def do_not_preprocess(): def wrapper(f): f.do_not_preprocess = True return f