engine = Twitter() # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search("is cooler than", count=25, cached=False): print tweet.description print tweet.author print tweet.date print hashtags(tweet.description) # Keywords in tweets start with a #. print # Create a unique ID based on the tweet content and author. id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: table.append([id, tweet.description]) index[id] = True table.save("cool.txt") print "Total results:", len(table) print # Print all the rows in the table. # Since it is stored as a file it can grow comfortably each time the script runs. # We can also open the table later on, in other scripts, for further analysis. # pprint(table) # Note: you can also search tweets by author: # Twitter().search("from:tom_de_smedt")
from pattern.web import Twitter, Google, plaintext from pattern.table import Table t = Table() for nomme, categorie in (("l'arnacoeur", "film"), ("le nom des gens", "film"), ("the ghost writer", "film"), ("tournée", "film"), ("des hommes et des dieux", "film"), ("gainsbourg, vie héroique", "film"), ("mammuth", "film")): for tweet in Twitter().search(nomme): s = plaintext(tweet.description) t.append([nomme, film, tweet.date, s])
# "X IS MORE IMPORTANT THAN Y" # Here is a rough example of how to build a web miner. # It mines comparative statements from Yahoo! and stores the results in a table, # which can be saved as a text file for further processing later on. # Pattern matching also works with Sentence objects from the MBSP module. # MBSP's parser is much more robust (but also slower). #from MBSP import Sentence, parse q = '"more important than"' # Yahoo search query p = "NP (VP) more important than NP" # Search pattern. p = Pattern.fromstring(p) t = Table() engine = Yahoo(license=None) for i in range(1): # max=10 for result in engine.search(q, start=i+1, count=100, cached=True): s = result.description s = plaintext(s) s = Sentence(parse(s)) for m in p.search(s): a = m.constituents(constraint=0)[-1] # Left NP. b = m.constituents(constraint=5)[ 0] # Right NP. t.append(( a.string.lower(), b.string.lower())) pprint(t) print print len(t), "results."
from pattern.table import Table, pprint # "X IS MORE IMPORTANT THAN Y" # Here is a rough example of how to build a web miner. # It mines comparative statements from Yahoo! and stores the results in a table, # which can be saved as a text file for further processing later on. # Pattern matching also works with Sentence objects from the MBSP module. # MBSP's parser is much more robust (but also slower). #from MBSP import Sentence, parse q = '"more important than"' # Yahoo search query p = "NP (VP) more important than NP" # Search pattern. p = Pattern.fromstring(p) t = Table() engine = Yahoo(license=None) for i in range(1): # max=10 for result in engine.search(q, start=i + 1, count=100, cached=True): s = result.description s = plaintext(s) s = Sentence(parse(s)) for m in p.search(s): a = m.constituents(constraint=0)[-1] # Left NP. b = m.constituents(constraint=5)[0] # Right NP. t.append((a.string.lower(), b.string.lower())) pprint(t) print print len(t), "results."
from pattern.web import Twitter, Google, plaintext from pattern.table import Table t = Table() for nomme, categorie in (("l'arnacoeur", "film"), ("le nom des gens", "film"), ("the ghost writer", "film"), ("tournée", "film"), ("des hommes et des dieux", "film"), ("gainsbourg, vie héroique", "film"), ("mammuth", "film")): for tweet in Twitter().search(nomme): s = plaintext(tweet.description) t.append([nomme, film, tweet.date, s])
from pattern.web import Twitter, Google, plaintext from pattern.table import Table t = Table() for politician, party in (("nicolas sarkozy", "ump"), ("dsk", "ps")): for tweet in Twitter().search(politician): if tweet.language in ("nl", "fr"): s = plaintext(tweet.description) s = Google().translate(s, tweet.language, "en") # w = sum([sentiment_score(word) for word in s.split(" ")]) t.append([politician, party, tweet.date, s])
engine = Twitter() # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search("is cooler than", count=25, cached=False): print tweet.description print tweet.author print tweet.date print hashtags(tweet.description) # Keywords in tweets start with a #. print # Create a unique ID based on the tweet content and author. id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: table.append([id, tweet.description]) index[id] = True table.save("cool.txt") print "Total results:", len(table) print # Print all the rows in the table. # Since it is stored as a file it can grow comfortably each time the script runs. # We can also open the table later on, in other scripts, for further analysis. #pprint(table) # Note: you can also search tweets by author: # Twitter().search("from:tom_de_smedt")