def search(kw):
    begin = int(request.args['begin'])
    end = int(request.args['end'])
    if begin == 0:
        try:
            tso = TwitterSearchOrder() # create a TwitterSearchOrder object
            tso.set_language('en')
            tso.set_keywords([kw]) # let's define all words we would like to have a look for
            tso.set_include_entities(False) # and don't give us all those entity information

            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(
                consumer_key = Tconsumer_key,
                consumer_secret = Tconsumer_secret,
                access_token = Taccess_token,
                access_token_secret = Taccess_token_secret
             )

             # this is where the fun actually starts :)
            ts.search_tweets(tso)
            oldTweets = Tweets.get_one(keyword=kw)
            if oldTweets is None:
                tweets = Tweets(keyword=kw, tw=ts.get_tweets()['statuses'])
                tweets.save()
            else:
                oldTweets.tw = ts.get_tweets()['statuses']
                oldTweets.save()
        except TwitterSearchException as e: # take care of all those ugly errors if there are some
            print(e)
            return jsonify(error=1)
    tweets = Tweets.get_one(keyword=kw).tw
    return jsonify(tweets=tweets[begin:end])
    def get_data(self, config):
        count = int(config.get('count', 50))

        # Configure the search query
        tso = TwitterSearchOrder()
        tso.set_keywords([config['query']])
        tso.set_include_entities(False)

        # Configure the search
        ts = TwitterSearch(consumer_key=config['consumer_key'],
                           consumer_secret=config['consumer_secret'],
                           access_token=config['access_token'],
                           access_token_secret=config['access_secret'])

        data = dict()
        data['query'] = config['query']
        data['tweets'] = list()

        # Do the search
        result = ts.search_tweets(tso)['content']['statuses']

        # Filter results into more usable format
        for tweet in result[:count]:
            filtered_tweet = dict()
            filtered_tweet['username'] = '******' + tweet['user']['screen_name']
            filtered_tweet['tweet'] = tweet['text']
            data['tweets'].append(filtered_tweet)

        return data
Esempio n. 3
0
def search(search):
    twitter_utils = twitter.Twitter()
    credentials = session.get('credentials')
    search = [search]
    try:
        tso = TwitterSearchOrder()
        tso.set_language('en')
        tso.set_keywords(search)
        tso.set_include_entities(False)  #Remove later if u want to use images
        query = TwitterSearch(consumer_key=session['consumer_key'],
                              consumer_secret=session['consumer_secret'],
                              access_token=session['token'],
                              access_token_secret=session['token_secret'])
        response = query.search_tweets(tso)
        t_range = datetime.now(pytz.utc) - timedelta(minutes=TIME_RANGE)
        tweets = [
            t for t in response['content']['statuses']
            if twitter_utils.get_date(t) >= t_range
        ]
        print("Current rate-limiting status: " +
              str(query.get_metadata()['x-rate-limit-reset']))
        return render_template(
            "page.html",
            search=search,
            tweets=tweets,
            music_config=twitter_utils.get_music_config(tweets))
    except TwitterSearchException as e:
        return str(e)
Esempio n. 4
0
def userSearch(user):
    ts = TwitterSearch(
        consumer_key='UIBl6otwQD9CtbhRQSQ2GlV8H',
        consumer_secret='MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu',
        access_token='85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO',
        access_token_secret='A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC')
    tuo = TwitterUserOrder(user)  # create a TwitterUserOrder
    # start asking Twitter about the timeline
    return ts.search_tweets(tuo)
Esempio n. 5
0
def userSearch(user):
    ts = TwitterSearch(
            consumer_key = 'UIBl6otwQD9CtbhRQSQ2GlV8H',
            consumer_secret = 'MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu',
            access_token = '85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO',
            access_token_secret = 'A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC'
         )
    tuo = TwitterUserOrder(user) # create a TwitterUserOrder
    # start asking Twitter about the timeline
    return ts.search_tweets(tuo)
def search_and_archive():
    todo = True
    results = []
    next_max_id = 0
    since_id = get_since_id()

    tso = TwitterSearchOrder()
    tso.add_keyword(TW_SEARCH_KEYWORD)
    tso.set_result_type('recent')
    if since_id > 0:
        tso.set_since_id(since_id)

    ts = TwitterSearch(consumer_key=TW_CONSUMER_KEY,
                       consumer_secret=TW_CONSUMER_SECRET,
                       access_token=TW_ACCESS_TOKEN,
                       access_token_secret=TW_ACCESS_SECRET)
    while (todo):
        print('Current MaxID is ' + str(next_max_id))
        response = ts.search_tweets(tso)
        todo = not len(response['content']['statuses']) == 0
        for tweet in response['content']['statuses']:
            result = {}
            for key in mp:
                result[key] = str(nest_get(mp[key], tweet))
            result.update({
                'time':
                format_timestamp(result['created_at']),
                'created_at':
                format_timestamp(result['created_at']),
                'rt_created_at':
                format_timestamp(result['rt_created_at']),
                'qs_created_at':
                format_timestamp(result['qs_created_at']),
                'primary_url':
                pick_primary_url(result['entities_urls']),
                'rt_primary_url':
                pick_primary_url(result['rt_entities_urls']),
                'qs_primary_url':
                pick_primary_url(result['qs_entities_urls']),
            })
            results.append(result)

            if (tweet['id'] < next_max_id) or (next_max_id == 0):
                next_max_id = tweet['id']
                next_max_id -= 1

        records = len(results)
        if records > 450 or todo is not True:
            bulk_load(results)
            results = []
            print('processed ' + str(records) + ' records.')

        tso.set_max_id(next_max_id)
        sleep(2)
Esempio n. 7
0
def searchTweet(mySearch, lang, isSaved, filePath, isSentToKafka, kafkaProd,
                topic):
    try:
        now = str(datetime.datetime.now()).replace(" ", "T")
        if isSaved:
            fd = open(filePath, 'w')
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords(
            mySearch
        )  # let's define all words we would like to have a look for
        tso.set_language(lang)  # we want to see German tweets only
        tso.set_include_entities(
            False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=consumerKey,
                           consumer_secret=consumerSecret,
                           access_token=accessTokenKey,
                           access_token_secret=accessTokenSecret)

        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            data = {
                "id": tweet["id"],
                "user": tweet["user"]["screen_name"],
                "sentence": tweet["text"],
                "country": getCountry(tweet["place"]),
                "lang": tweet["lang"],
                "retweetCount": tweet["retweet_count"],
                "isRetweet": tweet["retweeted"],
                "keySearch": mySearch[0],
                "eventDate": now
            }
            if isSaved:
                #                fd.write(json.dumps(data).encode("utf-8")+"\n")
                #                fd.flush()
                pass
            if isSentToKafka:
                kafkaProd.send(topic, json.dumps(data).encode("utf-8"))
        return ts.search_tweets(tso)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
Esempio n. 8
0
def hashtagSearch(hashtag):
    ts = TwitterSearch(
            consumer_key = 'UIBl6otwQD9CtbhRQSQ2GlV8H',
            consumer_secret = 'MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu',
            access_token = '85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO',
            access_token_secret = 'A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC'
         )
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([hashtag]) # let's define all words we would like to have a look for
        tso.set_include_entities(False) # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens


         # this is where the fun actually starts :)

        return ts.search_tweets(tso)
    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
Esempio n. 9
0
def hashtagSearch(hashtag):
    ts = TwitterSearch(
        consumer_key='UIBl6otwQD9CtbhRQSQ2GlV8H',
        consumer_secret='MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu',
        access_token='85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO',
        access_token_secret='A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC')
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords([
            hashtag
        ])  # let's define all words we would like to have a look for
        tso.set_include_entities(
            False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens

        # this is where the fun actually starts :)

        return ts.search_tweets(tso)
    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
Esempio n. 10
0
     # this is where the fun actually starts :)
#     for tweet in ts.search_tweets_iterable(tso):
#         print( '%s' % ( tweet['text'] ) )

# except TwitterSearchException as e: # take care of all those ugly errors if there are some
#     print(e)
    todo = True
    next_max_id = 0
    i=0
    f = open('tweet.txt','a')
    # let's start the action
    while(todo):

        # first query the Twitter API
        response = ts.search_tweets(tso)

        # print rate limiting status
        #print( "Current rate-limiting status: %s" % ts.get_metadata()['x-rate-limit-reset'])

        # check if there are statuses returned and whether we still have work to do
        todo = not len(response['content']['statuses']) == 0
        
        # check all tweets according to their ID
        for tweet in response['content']['statuses']:
            tweet_id = tweet['id']
            a=str(tweet['full_text'].encode('utf-8'))
            i=i+1
            print(i)

            # current ID is lower than current next_max_id?
Esempio n. 11
0
ts = TwitterSearch(
    consumer_key = 'F8QnORsYQqXiY2zu7FDmxfRsL',
    consumer_secret = '5v7l0GxfUb0ukigxTsvwiGungqWfZjBGv5fSygnXOUGH5JFbph',
    access_token = '17522673-AmQBBeDcHwkwJVtkwxhOW8iESiW5xoVlZeLNMOEIp',
    access_token_secret = 'hRyBq99BDmjXFaCnKTZ3YGVEUaFQz9bP0T7whvV0FFOv7'
)

onehourearlier = datetime.datetime.now() - datetime.timedelta(hours=4)
# onehourearlier = datetime.datetime.now()

currTime = onehourearlier.strftime('%l:%M%p').replace(' ','')
search_word = '#'+currTime
tso.set_keywords([search_word])
try:
    response = ts.search_tweets(tso)
    tweet = response['content']['statuses'][0]
    msg = '@%s: %s' % ( tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8') )

except TwitterSearchException as e:
    print(e)

if currTime.lower() in msg.lower():
    print msg
    # Push to Parse
    connection = httplib.HTTPSConnection('api.parse.com', 443)
    connection.connect()
    params = urllib.urlencode({"where":json.dumps({
            "timestamp": currTime
         })})
    connection.request('GET', '/1/classes/tweets?%s' % params, '', {
Esempio n. 12
0
        if (domainName.find('twitter.com') == -1):
            success = True
    return success


try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['cyclist', 'killed'])
    tso.set_language('en')
    tso.set_include_entities(True)

    ts = TwitterSearch(
        consumer_key='FSRqsdZnVc0vQaosOjNE7Gb4z',
        consumer_secret='8Vu8HqCXnVSuLvOZt3RJmlK8HVP9z5zsh7Qk9k4PcZjeNsAHnA',
        access_token='1018148114-G4apdnYp0htOcwYb2CBhnZkI8d619T40g8jUNEM',
        access_token_secret='ZVwZGXmox8mxWYpQrC6A8BhfiFIbT7Q3jlLiXwUE1v1q4')

    mysearchResp = ts.search_tweets(tso)
    contentOnly = mysearchResp['content']['statuses']
    filterResp = [
        x for x in contentOnly if len(x['entities']['urls']) > 0
        and contains_nonTwitter_domain(x['entities']['urls'])
    ]

    f = open('output.txt', 'w', encoding='utf8')
    f.write(str(filterResp))
    f.close()

except TwitterSearchException as e:
    print(e)
Esempio n. 13
0
def queryTwitter(records,outputFileIndex,totalRunTime,writeToFileTime, sleepTime):
    n = GeoLocator()
    req = 0
    next_max_id = 0
    startTime = time.time()
    lastWriteTime = startTime
    tso = None
    ts = None
#     while time.time() - startTime < totalRunTime:
    while True:
        try:
            now = time.time()
            print 'Total running time: ' + str(now-startTime) + ' seconds'
            # Check if it is time to write to file                           
            if now-lastWriteTime>writeToFileTime:
                print 'Writing to CSV ' + str(len(records)) + ' Tweets'
                records, outputFileIndex = saveRecordsToCSV (records,outputFileIndex)
                lastWriteTime = now               
            # If first run, or recover after exception, create new twitter search object
            if tso == None:
                tso = TwitterSearchOrder()
                tso.set_keywords([''])
                #tso.setLanguage('en')
                tso.set_count(100)
                tso.set_include_entities(False)
                tso.set_geocode(40.69, -73.94, 20, imperial_metric = False)
                #tso.setUntil(datetime.date(2014, 03, 24))        
                ts = TwitterSearch(consumer_key='FqjFRT1OHl6xyIGoq9uXSA',
                                   consumer_secret='KuhoVREmf7ngwjOse2JOLJOVXNCi2IVEzQZu2B8',
                                   access_token='114454541-xcjy2sbl7Rr4oIaogsaBrlVL5H4CvcdvOSMy3MnR',
                                   access_token_secret='yyBBOJhxgfw9pezZda2hWF94doONSd50y0JoylYjL3rmY', verify=False)

            # Query the Twitter API  
            text_file = open('json_tweets' + str(outputFileIndex) + '.txt', 'a')
            text_fileE = open('error_log.txt', 'a')
            req += 1
            print 'Request # ' + str(req)
            response = ts.search_tweets(tso)

            # check all tweets according to their ID
            for tweet in response['content']['statuses']:
                text_file.write(str(tweet))
                text_file.write('\n')
                tup = ()
                tweet_id = tweet['id']
                tup = tup + (tweet_id, )
                tup = tup + (str(tweet['created_at']), )
                tup = tup + (str(tweet['user']['created_at']), )
                tup = tup + (str(tweet['user']['screen_name']), )
                tup = tup + (str(tweet['user']['location'].encode('ascii', 'ignore')), )
                tup = tup + (str(tweet['user']['geo_enabled']), )
                tup = tup + (str(tweet['place']), )
                tup = tup + (str(tweet['geo']), )
                tup = tup + (str(tweet['text'].encode('ascii', 'ignore')), )
                tup = tup + (str(tweet['retweet_count']), )
                # Save only tweets with Geo within NYC or without geo at all
                try:
                    geoObj = yaml.load(tup[7])
                    lat = geoObj["u'coordinates'"][0]
                    long = geoObj["u'coordinates'"][1]
                    if n.isNYC(lat,long):
                        records.append(tup)
                except:
                    records.append(tup)

                # current ID is lower than current next_max_id?
                if tweet_id < next_max_id or next_max_id == 0:
                    next_max_id = tweet_id
                    next_max_id -= 1  # decrement to avoid seeing this tweet again

            # set lowest ID as MaxID
            tso.set_max_id(next_max_id)
            
            print 'Number of Tweets in memory: ' + str(len(records))
            print 'Sleeping...'
            time.sleep(sleepTime)
                           
        except TwitterSearchException, e:
            print e
            if len(records) == 0:
                next_max_id = 0
            if text_file.closed:
                pass
            else:
                text_file.close()
            outputFileIndex = getFile_index()
            text_fileE.write(str(e))
            text_fileE.write('\n')
            text_fileE.close()
            print 'sleeping after error...'
            time.sleep(900)
            # Set tso to None to create new Twitter search object 
            tso = None
Esempio n. 14
0
def run_tweet_scrub(keywords):
    print 'Running scrub for keywords: ' + str(keywords)
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords(keywords)
        tso.set_language('en')
        tso.set_include_entities(True)

        config_parse = ConfigParser.ConfigParser()
        config_parse.read('config.ini')

        ts = TwitterSearch(
            consumer_key=config_parse.get('keys', 'consumer_key'),
            consumer_secret=config_parse.get('keys', 'consumer_secret'),
            access_token=config_parse.get('keys', 'access_token'),
            access_token_secret=config_parse.get('keys',
                                                 'access_token_secret'),
        )

        scrub = Scrub(RunDateTime=datetime.datetime.now(),
                      ScrubType='Twitter',
                      ScrubTypeId=1,
                      SearchKeywords=', '.join(keywords))
        scrub.save()

        mysearchResp = ts.search_tweets(tso)
        contentOnly = mysearchResp['content']['statuses']
        filter_resp = [
            x for x in contentOnly if len(x['entities']['urls']) > 0
            and contains_nonTwitter_domain(x['entities']['urls'])
        ]
        num_related = 0
        scrub.NumCandidates = len(filter_resp)

        for candidate in filter_resp:
            twit_url = get_expanded_url(candidate['entities']['urls'])
            if IncidentSourceCandidate.select().where(
                    IncidentSourceCandidate.URL == twit_url).count() > 0:
                print('Continued on URL: ' + twit_url)
                continue

            g = Goose()
            article = g.extract(url=twit_url)
            try:
                twit_id = candidate['id']
                source_candidate = IncidentSourceCandidate(
                    URL=twit_url,
                    Domain=article.domain,
                    ArticleText=article.cleaned_text,
                    ArticleTitle=article.title,
                    Scrub=scrub,
                    SearchFeedId=twit_id,
                    SearchFeedURL=get_tweet_url(candidate),
                    SearchFeedText=candidate['text'].encode('utf8'))
                source_candidate.ArticleTitle.replace("'", "'")
                source_candidate.ArticleText.replace("'", "'")
                source_candidate.SearchFeedJSON = candidate
                if (article.opengraph is not None) and ('site_name'
                                                        in article.opengraph):
                    source_candidate.Name = article.opengraph['site_name']
                if source_is_related(source_candidate):
                    source_candidate.IsRelated = True
                    num_related += 1
                source_candidate.ArticleTitle.encode('ascii', 'ignore')
                source_candidate.ArticleText.encode('ascii', 'ignore')
                source_candidate.save()
                print(source_candidate.SearchFeedText)
            except Exception as e:
                print(twit_url)
                print(type(article.cleaned_text))
                print(e)

        scrub.NumRelatedCandidates = num_related
        scrub.save()

    except TwitterSearchException as e:
        print(e)

    print '\n'
    print '--------------------------------------'