def main(): running_topic_list = get_all_running_topics_list() twitter_module = TwitterListen() twitter_module.setup(running_topic_list) current_hour = datetime.now().hour try: last_sequence_id = str(Connection.Instance().db["counters"].find_one( {'_id': "tweetDBId"})['seq']) except: last_sequence_id = 0 pass count = 0 while True: print("Loop is continuing. count = {0}".format(count)) count += 1 sleep(300) new_running_topic_list = get_all_running_topics_list() if new_running_topic_list != running_topic_list: running_topic_list = new_running_topic_list print("Restarting Twitter Module!") twitter_module.restart(new_running_topic_list) if count % 6 == 0: new_last_sequence_id = str( Connection.Instance().db["counters"].find_one( {'_id': "tweetDBId"})['seq']) print("last_id = {0}, new_last_id = {1}".format( last_sequence_id, new_last_sequence_id)) if last_sequence_id == new_last_sequence_id: running_topic_list = new_running_topic_list print("Unexpectedly Stopped Module, Restarting...") twitter_module.restart(new_running_topic_list) last_sequence_id = new_last_sequence_id
def getInfluencers(themename, themeid): try: themeid = int(themeid) except: pass if (str(themeid) != "None") and (themename == "None"): with Connection.Instance().get_cursor() as cur: sql = ( "SELECT topic_name " "FROM topics " "WHERE topic_id = %s;" ) cur.execute(sql, [themeid]) var = cur.fetchall() themename = var[0][0] if themeid != "None" or themename != "None": result = {} if themename == "arduino": themename = "Arduino" elif themename == "raspberry pi": themename = "RaspberryPi" elif themename == "3d printer": themename = "Printer" influencers = list( Connection.Instance().infDB[str(themename)].find({"type": "filteredUser"}, {"_id": 0, "type": 0})) result['influencers'] = influencers else: result['influencers'] = "theme not found" return json.dumps(result, indent=4)
def publish_tweet(topic_id, tweet, url, access_token, access_token_secret): api = get_twitter_api(access_token, access_token_secret) text = tweet['body'] + " " + url try: s = api.update_status(text) id_str = s.id_str original_tweet = s._json link = "https://twitter.com/statuses/" + id_str Connection.Instance().tweetsDB[str(topic_id)].update_one( {'tweet_id': tweet['tweet_id']}, { '$set': { 'status': 1, 'tweet_link': link, 'tweet': original_tweet } }, upsert=True) except Exception as e: print(e) Connection.Instance().tweetsDB[str(topic_id)].update_one( {'tweet_id': tweet['tweet_id']}, {'$set': { 'status': -1 }}, upsert=True) pass
def main(): while True: users = get_users() for user_id in users: tokens = get_tokens(user_id) if tokens['response']: tokens = tokens['tokens'] for topic_id in Connection.Instance( ).tweetsDB.collection_names(): if topic_id == 'counters': continue tweets = list( Connection.Instance().tweetsDB[str(topic_id)].find({ 'published_at': { '$lte': datetime.now() }, 'user_id': str(user_id), 'twitter_id': tokens[2], 'status': 0 })) for tweet in tweets: print("Publishing tweet_id: {0} and topic_id: {1}". format(tweet['tweet_id'], topic_id)) url = "{0}redirect?topic_id={1}&tweet_id={2}".format( config("HOST_URL"), topic_id, tweet['tweet_id']) publish_tweet(topic_id, tweet, url, tokens[0], tokens[1]) sleep(300)
def insertEventsIntoDataBase(eventsWithIds, topic_id): for event, ids in eventsWithIds: ret = Connection.Instance().events[str(topic_id)].aggregate([{ '$match': { 'id': ids } }, { '$limit': 1 }]) if ret.alive: for elem in ret: newEventUpdateTime = datetime.strptime( event.get("updated_date", "0001-01-01T00:00:00")[:19], "%Y-%m-%dT%H:%M:%S") oldEventUpdateTime = datetime.strptime( elem.get("updated_date", "0001-01-01T00:00:00")[:19], "%Y-%m-%dT%H:%M:%S") if newEventUpdateTime != oldEventUpdateTime: print(newEventUpdateTime) print(oldEventUpdateTime) if newEventUpdateTime > oldEventUpdateTime: Connection.Instance().events[str(topic_id)].remove( {'id': ids}) Connection.Instance().events[str(topic_id)].insert_one( event) print('updated') else: print('existing') else: Connection.Instance().events[str(topic_id)].insert_one(event) print('added new')
def separates_tweet(alertDic, tweet): for key in alertDic: alert = alertDic[key] try: if tweet['lang'] in alert['lang']: for keyword in alert['keywords']: keyword = re.compile(keyword.replace(" ", "(.?)"), re.IGNORECASE) if 'extended_tweet' in tweet and 'full_text' in tweet[ 'extended_tweet']: if re.search( keyword, str(tweet['extended_tweet'] ['full_text'].decode('utf-8'))): tweet['_id'] = ObjectId() Connection.Instance().db[str( alert['alertid'])].insert_one(tweet) break else: if re.search(keyword, str(tweet['text'].decode('utf-8'))): tweet['_id'] = ObjectId() Connection.Instance().db[str( alert['alertid'])].insert_one(tweet) break except KeyError: pass
def delete_audience_members(topic_id): try: audienceNetworksDB['all_audience'].update( {'id': {'$in':Connection.Instance().audienceDB[str(topic_id)].distinct('id')}}, {'$pull':{'topics': int(topic_id)}}, multi=True ) except: audienceNetworksDB['all_audience'].update({}, {'$pull':{'topics': int(topic_id)}}, multi=True ) print("Deleted the topic from topics.") try: result = audienceNetworksDB['all_audience'].delete_many( { 'id': {'$in':Connection.Instance().audienceDB[str(topic_id)].distinct('id')}, '$where':'this.topics.length<1' } ) except: result = audienceNetworksDB['all_audience'].delete_many( {'$where':'this.topics.length<1'} ) print("Deleted " + str(result.deleted_count) + " audience members from all_audience_members.")
def getAllAlertList(): Connection.Instance().cur.execute("Select * from alerts;") var = Connection.Instance().cur.fetchall() alerts = [{'alertid':i[0], 'name':i[2], 'keywords':i[3].split(","), \ 'lang': i[5].split(","), 'status': i[6], 'creationTime': i[7]} for i in var] alerts = sorted(alerts, key=lambda k: k['alertid']) return alerts
def calculateLinks(alertid, date): print(alertid, date) stringDate = date date = determine_date(date) links = Connection.Instance().db[str(alertid)].aggregate([{'$match': {'timestamp_ms': {'$gte': date} }},\ {'$unwind': "$entities.urls" },\ {'$group' : {'_id' :"$entities.urls.expanded_url" , 'total':{'$sum': 1}}},\ {'$sort': {'total': -1}},\ {'$limit': 500}]) links = list(links) result = [] while len(result) < 60 and links != []: print(len(result)) link = links.pop(0) if link['_id'] != None: try: dic = linkParser(link) if dic != None and not next((item for item in result if item["title"] == dic['title'] and item["im"] == dic['im']\ and item["description"] == dic['description']), False): result.append(dic) except: pass if result != []: Connection.Instance().newsdB[str(alertid)].remove({'name': stringDate}) Connection.Instance().newsdB[str(alertid)].insert_one({ 'name': stringDate, stringDate: result, 'date': strftime("%a, %d %b %Y %H:%M:%S", gmtime()) })
def calculateLinks(alertid, date): print alertid, date stringDate = date date = determine_date(date) links = Connection.Instance().db[str(alertid)].aggregate([{'$match': {'timestamp_ms': {'$gte': date} }},\ {'$unwind': "$entities.urls" },\ {'$group' : {'_id' :"$entities.urls.expanded_url" , 'total':{'$sum': 1}}},\ {'$sort': {'total': -1}},\ {'$limit': 500}]) links = list(links) result = [] while len(result) < 60 and links != []: link = links.pop(0) print stringDate, len(result) if link['_id'] != None: try: dic = linkParser(link) if dic != None and dic != {}: result.append(linkParser(link)) except: pass if result != []: Connection.Instance().newsdB[str(alertid)].remove({'name': stringDate}) Connection.Instance().newsdB[str(alertid)].insert_one({'name': stringDate, stringDate:result, 'date': strftime("%a, %d %b %Y %H:%M:%S", gmtime())})
def stopAlert(alertid, mainT): Connection.Instance().cur.execute( "update alerts set isrunning = %s where alertid = %s;", [False, alertid]) Connection.Instance().PostGreSQLConnect.commit() alert = getAlertAllOfThemList(alertid) mainT.delAlert(alert)
def getFeeds(themename, date, cursor): dates = ['all', 'yesterday', 'week', 'month'] result = {} if date not in dates: result['Error'] = 'invalid date' return json.dumps(result, indent=4) date = determine_date(date) themeid = str(logic.getAlertId(themename)) length = len(list(Connection.Instance().db[themeid].aggregate([{'$match': {'timestamp_ms': {'$gte': date} }},\ {'$unwind': "$entities.urls" },\ {'$group' : {'_id' :"$entities.urls.expanded_url" , 'total':{'$sum': 1}}}]))) feeds = Connection.Instance().db[themeid].aggregate([{'$match': {'timestamp_ms': {'$gte': date} }},\ {'$unwind': "$entities.urls" },\ {'$group' : {'_id' :"$entities.urls.expanded_url" , 'total':{'$sum': 1}}},\ {'$sort': {'total': -1}},\ {'$skip': cursor},\ {'$limit': 20}]) feeds = list(feeds) last_feeds = [] if len(feeds) == 0: print len(list(feeds)) last_feeds.append("Cursor is Empty.") else: cursor = int(cursor) + 20 if cursor >= length: cursor = length result['next_cursor'] = cursor last_feeds = [i['_id'] for i in feeds if i['_id'] != None] result['cursor_length'] = length result['feeds'] = last_feeds return json.dumps(result, indent=4)
def triggerOneTopic(topic_id, topic_keyword_list, pages, subreddits): dates = ["day", "week", "month"] print("pages: ", pages) print("subreddits: ", subreddits) if pages is not None and len( pages) and pages[0] is not None and pages[0] != "": searchFacebookNews(topic_id, pages) if subreddits is not None and len( subreddits) and subreddits[0] is not None and subreddits[0] != "": searchSubredditNews(topic_id, subreddits) for date in dates: posts = [] if subreddits is not None and len(subreddits) and subreddits[ 0] is not None and subreddits[0] != "": posts.extend(mineRedditConversation(subreddits, False, date)) if pages is not None and len( pages) and pages[0] is not None and pages[0] != "": posts.extend(mineFacebookConversations(pages, False, date)) if len(posts) != 0: posts = sorted(posts, key=lambda k: k["numberOfComments"], reverse=True) Connection.Instance().conversations[str(topic_id)].remove( {"time_filter": date}) Connection.Instance().conversations[str(topic_id)].insert_one({ 'time_filter': date, 'posts': posts })
def compress_audience_data(): # remove unwanted fields from all objects print("Compressing audience data...") Connection.Instance().audienceDB['all_audience'].update({}, { '$unset': { "profile_background_color": 1, "default_profile_image": 1, "contributors_enabled": 1, "profile_sidebar_border_color": 1, "profile_use_background_image": 1, "profile_background_image_url": 1, "protected": 1, "translator_type": 1, "notifications": 1, "following": 1, "default_profile": 1, "is_translator": 1, "has_extended_profile": 1, "profile_image_url": 1, "timezone": 1, "follow_request_sent": 1, "profile_background_tile": 1, "is_translation_enabled": 1, "status": 1, "profile_text_color": 1, "profile_sidebar_fill_color": 1, "profile_link_color": 1 } }, multi=True) Connection.Instance().audienceDB.command({'compact': 'all_audience'})
def getFeeds(themename, cursor=0): length = len( list(Connection.Instance().feedDB[str(themename)].find({}, {"_id": 0}))) if cursor is None: feeds = Connection.Instance().feedDB[str(themename)].find({}, { "_id": 0 }).skip(0).limit(20) cursor = 0 else: feeds = Connection.Instance().feedDB[str(themename)].find({}, { "_id": 0 }).skip(int(cursor)).limit(20) result = {} feeds = list(feeds) if len(feeds) == 0: feeds.append("Cursor is Empty.") else: cursor = int(cursor) + 20 if cursor >= length: cursor = length result['next cursor'] = cursor result['cursor length'] = length result['feeds'] = feeds return json.dumps(result, indent=4)
def update_influencer_score(): for collection_name in Connection.Instance().audience_samples_DB.collection_names(): collection = Connection.Instance().audience_samples_DB[collection_name] start = time() print("Currently updating collection " + collection_name) topicID = int(collection_name.split("_")[1]) keywords = fetchKeywords(topicID, keyword_size) if not keywords: print("!!!Error fetching keywords!!!") continue bulk = collection.initialize_unordered_bulk_op() # Calculate score every time cursor = collection.find() for record in cursor: bulk.find({'_id':record['_id']}).update({'$set' : {"influencer_score" : calculateScore(record,keywords)}}) print("...Executing bulk operation") try: bulk.execute() except pymongo.errors.InvalidOperation as e: print("..."+str(e)) end = time() print("...It took {} seconds".format(end - start))
def checkTweets(alertid, newestId): if int(newestId) == -1: tweets = Connection.Instance().db[str(alertid)].find({}, { 'tweetDBId': 1, "text": 1, "id": 1, "user": 1, 'created_at': 1, "_id": 0 }).sort([('tweetDBId', pymongo.DESCENDING)]) else: tweets = Connection.Instance().db[str(alertid)].find( { 'tweetDBId': { '$gt': int(newestId) } }, { 'tweetDBId': 1, "text": 1, "user": 1, 'created_at': 1, "_id": 0 }).sort([('tweetDBId', pymongo.DESCENDING)]) tweets = list(tweets) return len(tweets)
def startAlert(alertid, mainT): alert = getAlertAllOfThemList(alertid) Connection.Instance().cur.execute( "update alerts set isrunning = %s where alertid = %s;", [True, alert['alertid']]) Connection.Instance().PostGreSQLConnect.commit() mainT.addAlert(alert)
def separates_tweet(alertDic, tweet): try: for key in alertDic: alert = alertDic[key] if tweet['lang'] in alert['lang']: for keyword in alert['keywords']: keyword = re.compile(keyword.replace(" ", "(.?)"), re.IGNORECASE) tweet['tweetDBId'] = get_next_tweets_sequence() if 'extended_tweet' in tweet and 'full_text' in tweet[ 'extended_tweet']: if re.search( keyword, str(tweet['extended_tweet']['full_text'])): updatedTime = datetime.fromtimestamp( int(tweet['timestamp_ms']) / 1e3) with Connection.Instance().get_cursor() as cur: sql = ("UPDATE topics " "SET last_tweet_date = %s " "WHERE topic_id = %s") cur.execute(sql, [updatedTime, alert['alertid']]) tweet['_id'] = ObjectId() if tweet['entities']['urls'] != []: tweet['redis'] = False else: tweet['redis'] = True Connection.Instance().db[str( alert['alertid'])].insert_one(tweet) break else: if re.search(keyword, str(tweet['text'])): updatedTime = datetime.fromtimestamp( int(tweet['timestamp_ms']) / 1e3) with Connection.Instance().get_cursor() as cur: sql = ("UPDATE topics " "SET last_tweet_date = %s " "WHERE topic_id = %s") cur.execute(sql, [updatedTime, alert['alertid']]) tweet['_id'] = ObjectId() if tweet['entities'][ 'urls'] == [] or tweet['entities']['urls'][ 0]['expanded_url'] is None: tweet['redis'] = True else: tweet['redis'] = False Connection.Instance().db[str( alert['alertid'])].insert_one(tweet) break except Exception as e: f = open('../log.txt', 'a+') s = '\n\n tweet lang: ' + tweet['lang'] f.write(s) f.write('\n') f.write(str(e)) f.write('\n\n') f.close() pass
def deleteAlert(alertid, mainT, userid): alert = getAlertAllOfThemList(alertid) setUserAlertLimit(userid, 'increment') mainT.delAlert(alert) Connection.Instance().db[str(alertid)].drop() Connection.Instance().cur.execute("delete from alerts where alertid = %s;", [alertid]) Connection.Instance().PostGreSQLConnect.commit()
def alertExist(alertid): Connection.Instance().cur.execute( "Select userid from alerts where alertid = %s;", [alertid]) var = Connection.Instance().cur.fetchone() if var != None: return True else: return False
def getThemes(userid): Connection.Instance().cur.execute( "select alertid, alertname from alerts where userid = %s", [userid]) var = Connection.Instance().cur.fetchall() themes = [{'alertid': i[0], 'name': i[1]} for i in var] result = {} result['themes'] = themes return json.dumps(result, indent=4)
def main(): Connection.Instance().cur.execute("Select alertid from alerts;") alertid_list = sorted(list(Connection.Instance().cur.fetchall())) parameters = createParameters(alertid_list) pool = ThreadPool(3) pool.map(calculateLinks, parameters) pool.wait_completion()
def checkUserIdAlertId(userid, alertid): Connection.Instance().cur.execute( "Select userid from alerts where alertid = %s;", [alertid]) var = Connection.Instance().cur.fetchone() if var != None and len(var) != 0: return int(var[0]) == int(userid) else: return False
def get_local_influencers(): # Sort audience sample and and write top 20 to influencers database for collection_name in Connection.Instance().audience_samples_DB.collection_names(): collection = Connection.Instance().audience_samples_DB[collection_name] influencers = list(collection.find({}).sort([("influencer_score" , -1)]).limit(20)) location, topic = collection_name.split("_") local_influencers_collection = Connection.Instance().local_influencers_DB[topic+"_"+location] local_influencers_collection.drop() local_influencers_collection.insert_many(influencers)
def getNextAlertId(): Connection.Instance().cur.execute( "select alertid from alerts order by alertid desc limit 1;") rows = Connection.Instance().cur.fetchall() if (len(rows) == 0): return 0 else: for temp in rows: return temp[0] + 1
def updateAlert(alert, mainT, userid): Connection.Instance().cur.execute( "update alerts set userid = %s, keywords = %s , languages = %s, isrunning = %s, description = %s where alertid = %s;", [ userid, alert['keywords'], alert['lang'], True, alert['description'], alert['alertid'] ]) Connection.Instance().PostGreSQLConnect.commit() alert = getAlertAllOfThemList(alert['alertid']) mainT.updateAlert(alert)
def getAllRunningAlertList(): Connection.Instance().cur.execute( "Select * from alerts where isrunning = %s;", [True]) var = Connection.Instance().cur.fetchall() alerts = [{ 'alertid': i[0], 'name': i[2], 'keywords': i[3].split(","), 'lang': i[5].split(",") } for i in var] return alerts
def getAlertList(userid): Connection.Instance().cur.execute( "Select * from alerts where userid = %s;", [userid]) var = Connection.Instance().cur.fetchall() alerts = [{'alertid':i[0], 'name':i[2], 'keywords':i[3].split(","), 'lang': i[5].split(","),\ 'status': i[6], 'creationTime': i[7]} for i in var] alerts = sorted(alerts, key=lambda k: k['alertid']) for alert in alerts: alert['tweetCount'] = Connection.Instance().db[str( alert['alertid'])].find().count() return alerts
def setUserAlertLimit(userid, setType): Connection.Instance().cur.execute( "select alertlimit from users where userid = %s", [userid]) fetched = Connection.Instance().cur.fetchall() if setType == 'decrement': newLimit = fetched[0][0] - 1 elif setType == 'increment': newLimit = fetched[0][0] + 1 Connection.Instance().cur.execute( "update users set alertlimit = %s where userid = %s", [newLimit, userid]) Connection.Instance().PostGreSQLConnect.commit()