for i in range(2): print(i) for tweet in engine.search("eulogy", start=prev, count=25, cached=False): print("") print(tweet.text) print(tweet.author) print(tweet.date) print(hashtags(tweet.text)) # Keywords in tweets start with a "#". print("") # Only add the tweet to the table if it doesn't already exists. if len(table) == 0 or tweet.id not in index: table.append([tweet.id, tweet.text]) index.add(tweet.id) # Continue mining older tweets in next iteration. prev = tweet.id # Create a .csv in pattern/examples/01-web/ table.save(pd("eulogy_july_21.csv")) print("Total results: %s" % len(table)) print("") # Print all the rows in the table. # Since it is stored as a CSV-file it grows comfortably each time the script runs. # We can also open the table later on: in other scripts, for further analysis, ... pprint(table, truncate=100) # Note: you can also search tweets by author: # Twitter().search("from:tom_de_smedt")
print(i) for tweet in engine.search("is cooler than", start=prev, count=25, cached=False): print() print(tweet.text.encode("utf-8")) print(tweet.author) print(tweet.date) print(hashtags(tweet.text)) # Keywords in tweets start with a "#". print() # Only add the tweet to the table if it doesn't already exists. if len(table) == 0 or tweet.id not in index: table.append([tweet.id, tweet.text]) index.add(tweet.id) # Continue mining older tweets in next iteration. prev = tweet.id # Create a .csv in pattern/examples/01-web/ table.save(pd("cool.csv")) print("Total results:", len(table)) print() # Print all the rows in the table. # Since it is stored as a CSV-file it grows comfortably each time the script runs. # We can also open the table later on: in other scripts, for further # analysis, ... pprint(table, truncate=100) # Note: you can also search tweets by author: # Twitter().search("from:tom_de_smedt")
("id", INTEGER), # Define the column headers. ("name", STRING), ("type", STRING) ]) print ds.rows[0] # A list of rows. print ds.columns[1] # A list of columns, where each column is a list of values. print ds.name print # Columns can be manipulated directly like any other Python list. # This can be slow for large tables. If you need a fast way to do matrix math, # use numpy (http://numpy.scipy.org/) instead. # The purpose of Table is data storage. ds.columns.append([ "green", "purple", "white", "yellow" ], field=("color", STRING)) # Save as a comma-separated (unicode) text file. ds.save("food.txt", headers=True) # Load a table from file. ds = Datasheet.load("food.txt", headers=True) pprint(ds, truncate=50, padding=" ", fill=".") print print ds.fields
# "X IS MORE IMPORTANT THAN Y" # Here is a rough example of how to build a web miner. # It mines comparative statements from Bing and stores the results in a table, # which can be saved as a text file for further processing later on. # Pattern matching also works with Sentence objects from the MBSP module. # MBSP's parser is much more robust (but also slower). #from MBSP import Sentence, parse q = '"more important than"' # Bing search query p = "NP (VP) more important than NP" # Search pattern. p = Pattern.fromstring(p) d = Datasheet() engine = Bing(license=None) for i in range(1): # max=10 for result in engine.search(q, start=i+1, count=100, cached=True): s = result.description s = plaintext(s) s = Sentence(parse(s)) for m in p.search(s): a = m.constituents(constraint=0)[-1] # Left NP. b = m.constituents(constraint=5)[ 0] # Right NP. d.append(( a.string.lower(), b.string.lower())) pprint(d) print print len(d), "results."