Example #1
0
def main():
    args = sys.argv[1:]

    if len(args) < 1:
        print "Tweet collections USAGE: missing \'trend\' argument"
        sys.exit(1)

    trend = args[0]
    if not trend or trend == '':
        print "Value for argument \'trend\' is either blank or d.n.e."
        sys.exit(1)

    print "Tweet collection for trend ", trend, '...'

    keyset = 1
    if len(args) > 1:
        keyset = int(args[1])

    globalobjs.init(keyset)
    auth = tweepy.OAuthHandler(globalobjs.consumer_key, globalobjs.consumer_secret)
    auth.set_access_token(globalobjs.access_token, globalobjs.access_token_secret)
    # api = tweepy.API(auth)

    sapi = tweepy.streaming.Stream(auth, MongoStreamListener(trend, globalobjs.db, globalobjs.getLogFile(trend)))
    try:
        sapi.filter(track=[trend])
    except(KeyboardInterrupt, SystemExit):
        print "User stopped with Ctrl+C"
    finally:
        print "ENTER FINALLY"
        globalobjs.destroy()
Example #2
0
def fetchTweets():
    query={}
    selector={}

    try:
        iter=crazydump.find()
    except:
        print "Could not fetch tweet", sys.exc.info()[0]

    #limit=crazydump.count()
    limit=40
    counter=0
    globalobjs.init()

    #print globalobjs.stopwords_list
    for tweetDoc in iter:
        print '\n', counter, '\n', tweetDoc['text'].encode('utf-8','ignore')
        tt=streamfilters.processTweetText(tweetDoc['text'])
        print tt
        counter+=1
        if(counter>=limit):
            break
Example #3
0
# from gensim import corpora, models, similarities
# import tweetcorpus
# import operator
# import itertools
import sys
import numpy
import logging
from datetime import datetime
from scipy import stats
import ldamodel
from resources import globalobjs

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
globalobjs.init()

# TODO:
# Handle all variations in lda parameters to be taken from command line args
# numtopics, numupdates, doc_chunk, corpus_passes
# offset, corpus length
# filters from command line


def comparison(trend1, trend2, metric = "JS", filters1 = None, filters2 = None):
    numtopics = globalobjs.num_topics_lda
    numupdates = globalobjs.update_freq
    doc_chunk = globalobjs.lda_chunk_size
    corpus_passes = globalobjs.passes_corpus

    start = datetime(2014, 4, 26)
    # filters1 = {"timestamp": {"$gt": start}}
    filters1 = None