Exemple #1
0
def stream_crawl(keyword):

    loop = True
    while (loop):
        track = keyword
        listen = TwitterStream(api)
        stream = tweepy.Stream(auth, listen)

        # Starting a Stream
        # stream.filter(track=['python'])

        print("Twitter streaming started... ")
        try:
            # stream.filer( track = track )
            stream.filter(track=['python'])
            loop = False
        except:
            print("Error!...And Retry after 60 sec")
            loop = True
            stream.disconnect()
            sleep(5)
            continue
Exemple #2
0
from TwitterStream import TwitterStream

#Driver code for the project

print "Main"
twitter = TwitterStream()
twitter.get_stream("Ivanka")
    proxy = os.environ["http_proxy"]
except KeyError:
    print "Not using a proxy. If you want to use a proxy, you need to do something like this"
    print "export http_proxy=http://www-cache.your.site.com:3128/"
    proxy = None

print username, password, proxy

pids = ["b00001", "b00002", "b00003"]
# keywords = [ "Sarah Jane", "CBBC"] #trending topics earlier

trends_url = "http://api.twitter.com/1/trends/current.json"
raw_trending = get_url(trends_url)
trending_cooked = cjson.decode(raw_trending)

print "Trending Topics", trending_cooked

trending_topics = [X["query"] for X in trending_cooked["trends"].values()[0]]

keywords = trending_topics

# request = [ pids, keywords ]
request = [keywords, pids]  # docstring wrong, should be this way round

Pipeline(
    DataSource([request]),
    TwitterStream(username=username, password=password, proxy=proxy),
    PureTransformer(lambda x: repr(x) + "\n"),
    ConsoleEchoer(),
).run()
Exemple #4
0
from TwitterStream import TwitterStream
api_key = "Put the api key here"
api_secret = "Put the api secret here"
access_token_key = "Put the token key here"
access_token_secret = "Put the token secret here"

url = "https://stream.twitter.com/1.1/statuses/filter.json?locations=-122.995004,32.323198,-67.799695,49.893813 & language=en"

mytweet = TwitterStream(api_key=api_key,
                        api_secret=api_secret,
                        access_token_key=access_token_key,
                        access_token_secret=access_token_secret,
                        debug=0,
                        http_method="GET")

mytweet.fetchsamples(url=url, numberoftweets=25000)

output = mytweet.clean_tweets()
Exemple #5
0
                                                  10),
                         linkages={
                             ("self", "inbox"): ("LINKRESOLVE", "inbox"),
                             ("LINKRESOLVE", "outbox"): ("self", "outbox"),
                             ("LINKRESOLVE", "urlrequests"):
                             ("LINKREQUESTER", "inbox"),
                             ("LINKREQUESTER", "outbox"):
                             ("LINKRESOLVE", "responses")
                         }).activate()
 system = Graphline(
     CURRENTPROG=WhatsOn(proxy),
     REQUESTER=Requester(
         "all", dbuser, dbpass
     ),  # Can set this for specific channels to limit Twitter requests whilst doing dev
     FIREHOSE=TwitterStream(
         username, password, proxy, True, 40
     ),  # Twitter API sends blank lines every 30 secs so timeout of 40 should be fine
     SEARCH=PeopleSearch(consumerkeypair, keypair, proxy),
     COLLECTOR=DataCollector(dbuser, dbpass),
     RAWCOLLECTOR=RawDataCollector(dbuser, dbpass),
     HTTPGETTER=HTTPGetter(proxy, "BBC R&D Grabber", 10),
     HTTPGETTERRDF=HTTPGetter(proxy, "BBC R&D Grabber", 10),
     TWOWAY=TwoWaySplitter(),
     ANALYSIS=LiveAnalysis(dbuser, dbpass),
     NLTKANALYSIS=LiveAnalysisNLTK(dbuser, dbpass),
     TWEETCLEANER=Pipeline(
         LINKER, RetweetFixer(), RetweetCorrector(dbuser, dbpass),
         TweetCleaner(['user_mentions', 'urls', 'hashtags'])),
     NLTKANALYSISFINAL=FinalAnalysisNLTK(dbuser, dbpass),
     TWEETCLEANERFINAL=Pipeline(
         LINKERFINAL, RetweetFixer(), RetweetCorrector(dbuser, dbpass),