def dramatize(s): """ Returns a string with stronger adjectives: dramatize("This code is nice") => "This code is legendary" """ x = [] # A parse tree takes a string and returns a list of sentences, # where each sentence is a list of words, where each word is an # object with interesting attributes such as Word.tag. for sentence in parsetree(s): for word in sentence: replaced = False if word.tag == "JJ": # What's the polarity of this adjective? polarity = sentiment(word.string)[0] # Don't change neutral adjectives like "last", "political", ... if polarity != 0.0: # Can we find an adjective in our dictionary # with a more extreme polarity? # Note: the shuffled() function takes a list # and returns a new, randomly ordered list. for w, p in shuffled(adjectives.items()): if polarity >= 0 and p > polarity + 0.2 \ or polarity < 0 and p < polarity - 0.2: x.append(w.lower()) replaced = True break if not replaced: x.append(word.string) return " ".join(x)
def test_shuffled(self): # Assert shuffled() <=> sorted(). v1 = [1,2,3,4,5,6,7,8,9,10] v2 = vector.shuffled(v1) self.assertTrue(v1 != v2 and v1 == sorted(v2)) print("pattern.vector.shuffled()")
def test_shuffled(self): # Assert shuffled() <=> sorted(). v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] v2 = vector.shuffled(v1) self.assertTrue(v1 != v2 and v1 == sorted(v2)) print "pattern.vector.shuffled()"
# it will check if it could correctly predict this example. # If not, it will adjust its weights. # So the accuracy of the perceptron can be improved significantly # by training in multiple iterations, averaging out all weights. # This will take several minutes. # If you want it to run faster for experimentation, # use less iterations or less data in the code below: print("training model...") seed(0) # Lock random list shuffling so we can compare. m = Model(known=known, unknown=unknown, classifier=SLP()) for iteration in range(5): for s in shuffled(data[:20000]): prev = None next = None for i, (w, tag) in enumerate(s): if i < len(s) - 1: next = s[i + 1] m.train(w, tag, prev, next) prev = (w, tag) next = None f = os.path.join(os.path.dirname(__file__), "en-model.slp") m.save(f, final=True) # Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...) # assumes that a lexicon of known words and their most frequent tag is available, # along with some rules for morphology (suffixes, e.g., -ly = adverb)
# so you can still follow the rest of the example. classifier = SVM() # We'll build a classifier to predict sentiment in Dutch movie reviews. # For example, "geweldige film!" (great movie) indicates a positive sentiment. # The CSV file at pattern/test/corpora/polarity-nl-bol.com.csv # contains 1,500 positive and 1,500 negative reviews. # The pattern.vector module has a shuffled() function # which we use to randomly arrange the reviews in the list: print("loading data...") data = os.path.join(os.path.dirname(__file__), "..", "..", "test", "corpora", "polarity-nl-bol.com.csv") data = Datasheet.load(data) data = shuffled(data) # We do not necessarily need Document objects as in the previous examples. # We can train any classifier on simple Python dictionaries too. # This is sometimes easier if you want full control over the data. # The instance() function below returns a train/test instance for a given review: # 1) parse the review for part-of-speech tags, # 2) keep adjectives, adverbs and exclamation marks (these mainly carry sentiment), # 3) lemmatize the Dutch adjectives, e.g., "goede" => "goed" (good). # 4) count the distinct words in the list, map it to a dictionary. def instance(review): # "Great book!" v = tag(review) # [("Great", "JJ"), ("book", "NN"), ("!", "!")] v = [word for (word, pos) in v if pos in ("JJ", "RB") or word in ("!")] v = [predicative(word) for word in v] # ["great", "!", "!"]
# so you can still follow the rest of the example. classifier = SVM() # We'll build a classifier to predict sentiment in Dutch movie reviews. # For example, "geweldige film!" (great movie) indicates a positive sentiment. # The CSV file at pattern/test/corpora/polarity-nl-bol.com.csv # contains 1,500 positive and 1,500 negative reviews. # The pattern.vector module has a shuffled() function # which we use to randomly arrange the reviews in the list: print "loading data..." data = Datasheet.load( os.path.join("..", "..", "test", "corpora", "polarity-nl-bol.com.csv")) data = shuffled(data) # We do not necessarily need Document objects as in the previous examples. # We can train any classifier on simple Python dictionaries too. # This is sometimes easier if you want full control over the data. # The instance() function below returns a train/test instance for a given review: # 1) parse the review for part-of-speech tags, # 2) keep adjectives, adverbs and exclamation marks (these mainly carry sentiment), # 3) lemmatize the Dutch adjectives, e.g., "goede" => "goed" (good). # 4) count the distinct words in the list, map it to a dictionary. def instance(review): # "Great book!" v = tag(review) # [("Great", "JJ"), ("book", "NN"), ("!", "!")] v = [word for (word, pos) in v if pos in ("JJ", "RB") or word in ("!")] v = [predicative(word) for word in v] # ["great", "!", "!"]