Esempio n. 1
0
engine = Twitter()

# With cached=False, a live request is sent to Twitter,
# so we get the latest results for the query instead of those in the local cache.
for tweet in engine.search("is cooler than", count=25, cached=False):
    print tweet.description
    print tweet.author
    print tweet.date
    print hashtags(tweet.description)  # Keywords in tweets start with a #.
    print
    # Create a unique ID based on the tweet content and author.
    id = hash(tweet.author + tweet.description)
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, tweet.description])
        index[id] = True

table.save("cool.txt")

print "Total results:", len(table)
print

# Print all the rows in the table.
# Since it is stored as a file it can grow comfortably each time the script runs.
# We can also open the table later on, in other scripts, for further analysis.
# pprint(table)

# Note: you can also search tweets by author:
# Twitter().search("from:tom_de_smedt")
Esempio n. 2
0
from pattern.web   import Twitter, Google, plaintext
from pattern.table import Table
t = Table()
for nomme, categorie in (("l'arnacoeur", "film"), ("le nom des gens", "film"), ("the ghost writer", "film"), ("tournée", "film"), ("des hommes et des dieux", "film"), ("gainsbourg, vie héroique", "film"), ("mammuth", "film")):
    for tweet in Twitter().search(nomme):
        s = plaintext(tweet.description)        
        t.append([nomme, film, tweet.date, s])
Esempio n. 3
0
# "X IS MORE IMPORTANT THAN Y"
# Here is a rough example of how to build a web miner.
# It mines comparative statements from Yahoo! and stores the results in a table,
# which can be saved as a text file for further processing later on.

# Pattern matching also works with Sentence objects from the MBSP module.
# MBSP's parser is much more robust (but also slower).
#from MBSP import Sentence, parse

q = '"more important than"'          # Yahoo search query
p = "NP (VP) more important than NP" # Search pattern.
p = Pattern.fromstring(p)
t = Table()

engine = Yahoo(license=None)
for i in range(1): # max=10
    for result in engine.search(q, start=i+1, count=100, cached=True):
        s = result.description
        s = plaintext(s)
        s = Sentence(parse(s))
        for m in p.search(s):
            a = m.constituents(constraint=0)[-1] # Left NP.
            b = m.constituents(constraint=5)[ 0] # Right NP.
            t.append((
                a.string.lower(), 
                b.string.lower()))

pprint(t)

print
print len(t), "results."
Esempio n. 4
0
from pattern.table import Table, pprint

# "X IS MORE IMPORTANT THAN Y"
# Here is a rough example of how to build a web miner.
# It mines comparative statements from Yahoo! and stores the results in a table,
# which can be saved as a text file for further processing later on.

# Pattern matching also works with Sentence objects from the MBSP module.
# MBSP's parser is much more robust (but also slower).
#from MBSP import Sentence, parse

q = '"more important than"'  # Yahoo search query
p = "NP (VP) more important than NP"  # Search pattern.
p = Pattern.fromstring(p)
t = Table()

engine = Yahoo(license=None)
for i in range(1):  # max=10
    for result in engine.search(q, start=i + 1, count=100, cached=True):
        s = result.description
        s = plaintext(s)
        s = Sentence(parse(s))
        for m in p.search(s):
            a = m.constituents(constraint=0)[-1]  # Left NP.
            b = m.constituents(constraint=5)[0]  # Right NP.
            t.append((a.string.lower(), b.string.lower()))

pprint(t)

print
print len(t), "results."
Esempio n. 5
0
from pattern.web import Twitter, Google, plaintext
from pattern.table import Table
t = Table()
for nomme, categorie in (("l'arnacoeur", "film"), ("le nom des gens", "film"),
                         ("the ghost writer", "film"), ("tournée", "film"),
                         ("des hommes et des dieux",
                          "film"), ("gainsbourg, vie héroique",
                                    "film"), ("mammuth", "film")):
    for tweet in Twitter().search(nomme):
        s = plaintext(tweet.description)
        t.append([nomme, film, tweet.date, s])
Esempio n. 6
0
from pattern.web   import Twitter, Google, plaintext
from pattern.table import Table
t = Table()
for politician, party in (("nicolas sarkozy", "ump"), ("dsk", "ps")):
    for tweet in Twitter().search(politician):
        if tweet.language in ("nl", "fr"):
            s = plaintext(tweet.description)
            s = Google().translate(s, tweet.language, "en")
#            w = sum([sentiment_score(word) for word in s.split(" ")])
            t.append([politician, party, tweet.date, s])
Esempio n. 7
0
engine = Twitter()

# With cached=False, a live request is sent to Twitter,
# so we get the latest results for the query instead of those in the local cache.
for tweet in engine.search("is cooler than", count=25, cached=False):
    print tweet.description
    print tweet.author
    print tweet.date
    print hashtags(tweet.description)  # Keywords in tweets start with a #.
    print
    # Create a unique ID based on the tweet content and author.
    id = hash(tweet.author + tweet.description)
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, tweet.description])
        index[id] = True

table.save("cool.txt")

print "Total results:", len(table)
print

# Print all the rows in the table.
# Since it is stored as a file it can grow comfortably each time the script runs.
# We can also open the table later on, in other scripts, for further analysis.
#pprint(table)

# Note: you can also search tweets by author:
# Twitter().search("from:tom_de_smedt")