def instance(review): # "Great book!" # [("Great", "JJ"), ("book", "NN"), ("!", "!")] v = tag(review) v = [word for (word, pos) in v if pos in ("JJ", "RB") or word in ("!")] v = [predicative(word) for word in v] # ["great", "!", "!"] v = count(v) # {"great": 1, "!": 1} return v
def wordvarieties(word): lem = lemma(word) pre = predicative(word) att = attributive(word) sin = singularize(word) con = conjugate(word, PRESENT, 1, SG) return [lem, pre, att, sin, con]
def test_predicative(self): # Assert the accuracy of the predicative algorithm ("felle" => "fel"). from pattern.db import Datasheet i, n = 0, 0 for pred, attr, sg, pl in Datasheet.load(os.path.join(PATH, "corpora", "wordforms-nl-celex.csv")): if nl.predicative(attr) == pred: i += 1 n += 1 self.assertTrue(float(i) / n > 0.96) print("pattern.nl.predicative()")
def test_predicative(self): # Assert the accuracy of the predicative algorithm ("felle" => "fel"). from pattern.db import Datasheet i, n = 0, 0 for pred, attr, sg, pl in Datasheet.load(os.path.join(PATH, "corpora", "wordforms-nl-celex.csv")): if nl.predicative(attr) == pred: i +=1 n += 1 self.assertTrue(float(i) / n > 0.96) print "pattern.nl.predicative()"
def prepare_text_nl(row): """ Prepares dutch text by doing the following: * Lemmatize a word * Singularize a word * Predicative a word Parameters: ----------- row : pandas dataframe A row of a pandas dataframe Returns: -------- new_message : pandas dataframe A row of a pandas dataframe """ try: message = split(parse(row.Message_Only_Text)) except: print(row.Message_Only_Text) new_message = '' for sentence in message: for word, tag in sentence.tagged: if (tag == 'MD') | ('VB' in tag): new_message += lemma(word) + ' ' elif tag == 'NNS': new_message += singularize(word) + ' ' elif 'JJ' in tag: new_message += predicative(word) + ' ' else: new_message += word + ' ' return new_message