def take_actions(self): mongo_app = connections.mongo() mongo_app.connect_to_mongo() label = 'Select Users to take action' done = st.multiselect(label, [i for i in self.users]) try: user = done[-1] url = f'[{user}](https://twitter.com/{user})' st.write('@' + url + ' is the selected user, Please fill the details below') activity = [] for i in mongo_app.db.users.find({'screen_name': user}): activity.append(i['activity']) st.write('The number of interactions with @' + url + ' is : ' + str(activity[0])) options = [None, 'Discover', 'Mute', 'Block'] label = 'What action do you wan to take on @' + user status = st.radio(label, options, index=0) options = [None, 'Yes', 'No'] label = 'Do you want @' + user + ' to be Core user ?' core_user = st.radio(label, options, index=0) if st.button('Done'): if ((status == None) or (core_user == None)): st.write('Enter above details properly') st.stop() else: with st.spinner('Working'): mongo_app.db.users.update({'screen_name': user}, { '$set': { 'status': status, 'core_user': core_user } }) member = [] for i in mongo_app.db.users.find({'screen_name': user}): member.append(i) label = member[0]['screen_name'] + ', ' + member[0][ 'status'] + ', ' + member[0]['core_user'] st.success(label) except IndexError: st.write( 'Start selecting users, After pressing Discover users button')
def make_new_user(self, user_json, api): new_user = api.get_user(id=user_json['screen_name']) new_user_json = new_user._json self.user['screen_name'] = new_user_json['screen_name'] self.user['id'] = new_user_json['id'] self.user['id_str'] = new_user_json['id_str'] self.user['name'] = new_user_json['name'] if user_json['core_user'] == 'Yes': self.core_user = True else: self.core_user = False self.day_added = user_json['created_day'] self.year_added = user_json['created_year'] self.bond_stats = None mongo_app = connections.mongo() mongo_app.connect_to_mongo() mongo_app.db.users.update({'_id': user_json['_id']}, {'$set': { 'made_user': True }})
def delete_user(self): st.write('---') st.write('## Delete User') st.write('To delete a user, Please enter the details below') label = 'Enter screen_name of user' screen_name = str(st.text_input(label)) mongo_app = connections.mongo() mongo_app.connect_to_mongo() if st.button('Delete'): if mongo_app.db.community.find({ 'user.screen_name': screen_name }).count() == 0: st.write(f'@{screen_name} is not their 😏') st.stop() else: mongo_app.db.community.remove( {'user.screen_name': screen_name}) mongo_app.db.tweets.remove({'user.screen_name': screen_name}) st.write(f'@{screen_name} is deleted')
def index(): query = dict(request.get_json()) mongo_app = connections.mongo() mongo_app.connect_to_mongo() for i in mongo_app.db.reports.find({'report': 'cluster_community'}): no_clusters = i['no_clusters'] break results = [] results_count = no_clusters for cluster_number in range(no_clusters): query = {'cluster_number': cluster_number} result = tasks.discover.delay(query) t = random.uniform(0, 3) print(t) #only for debug purpose time.sleep(t) results.append(result) success_count = 0 while (success_count == results_count): success_count = 0 for result in results: if result.status == 'SUCCESS': success_count += 1 return 'done'
def index(): query = dict(request.get_json()) mongo_app = connections.mongo() mongo_app.connect_to_mongo() all_users = user_objects.user_set() all_users.get_users({}) #making empty graph g = ig.Graph(directed=True) #making vertices for user in all_users.users: g.add_vertex(name=user.user['screen_name'], size=user.activity) #making edges for user in all_users.users: for edge in user.edges: if edge['bond'] != 0: g.add_edge(user.user['screen_name'], edge['screen_name'], weight=edge['bond']) #clustering people cluster = g.community_infomap(edge_weights=g.es['weight'], vertex_weights=g.vs['size'], trials=1000) cluster_number = 0 for graph in cluster.subgraphs(): for vertex in graph.vs: mongo_app.db.community.update( {'user.screen_name': vertex['name']}, {'$set': { 'cluster_number': cluster_number }}) cluster_number += 1 if mongo_app.db.reports.find({'report': 'cluster_community'}).count() > 0: mongo_app.db.reports.update({'report': 'cluster_community'}, { '$set': { 'no_users': len(all_users.users), 'no_clusters': cluster_number } }) else: mongo_app.db.reports.insert({ 'report': 'cluster_community', 'no_users': len(all_users.users), 'no_clusters': cluster_number }) return ('done')
def get_tweets(self, query): mongo_app = connections.mongo() mongo_app.connect_to_mongo() for each_tweet in mongo_app.db.tweets.find(query): tweet = Tweet(each_tweet) self.tweets.append(tweet)
def make_new_users(self, query): mongo_app = connections.mongo() mongo_app.connect_to_mongo() mongo_app.db.users.remove({ '$and': [{ 'created_year': { '$ne': query['created_year'] } }, { 'created_day': { '$ne': query['created_day'] } }] }) users = [] for member in mongo_app.db.users.find({ '$and': [{ 'status': query['status'] }, { 'created_year': query['created_year'] }, { 'created_day': query['created_day'] }, { 'made_user': query['made_user'] }] }): users.append(member) keys = connections.twitter_api_keys() keys.get_existing_keys(no_of_keys=1) api = keys.keys[0].connect_to_twitter() try: new_user = api.get_user(id=users[0]['screen_name']) except Exception as e: if e.reason[:22] == 'Failed to send request': st.write('No Internet 😴') st.stop() if e.api_code == 50: st.write('User not found, please check the name properly 🥱') st.stop() for member in users: new_user = User() new_user.make_new_user(member, api) self.users.append(new_user)
def delete(self): st.write('---') st.write('## Delete API APP keys') app_name = str(st.text_input('Enter API APP name')) mongo_app = connections.mongo() mongo_app.connect_to_mongo() mongo_app.db.auth.remove({'app_name': app_name})
def get_users(self, query): mongo_app = connections.mongo() mongo_app.connect_to_mongo() for user in mongo_app.db.community.find(query): existing_user = User() existing_user.make_user(user) self.users.append(existing_user) self.user_ids.append(user['_id'])
def push_to_community(self): mongo_app = connections.mongo() mongo_app.connect_to_mongo() for user in self.users: user_json = user.make_user_json() if not mongo_app.db.community.find({ 'user': user_json['user'] }).count() > 0: mongo_app.db.community.insert(user_json)
def get_mute_users(): mongo_app = connections.mongo() mongo_app.connect_to_mongo() try: mute_users = [] for user in mongo_app.db.mute.find({}): mute_users.append(user['screen_name']) except: pass return mute_users
def is_auth_added(self): if (st.button('Add API Keys')): correct = self.is_correct() repeated = self.is_repeated() if (repeated or not correct): st.write( 'Please enter details properly, probabably you are missing to fill something or are trying to add same key again, if nothing solves try deleting the existing api app and add new one' ) st.stop() else: key_json = self.key.make_json() mongo_app = connections.mongo() mongo_app.connect_to_mongo() #testing api = self.key.connect_to_twitter_no_wait() try: user_id = 'Twitter' user = api.get_user(id=user_id) except tweepy.RateLimitError: st.write( 'Try after 15 mins as we have exceded twitter rate limit' ) st.stop() except Exception as e: if e.reason[:22] == 'Failed to send request': st.write('No Internet 🥱') st.stop() else: st.write( 'There may be some mistake in the keys entered 😭') st.stop() mongo_app.db.auth.insert(key_json) st.write('**To Delete key, Use Mongo Compass.**') msg = 'The Twitter API Key is Added, Now you can collect more Data efficiently' st.success(msg)
def index(): query = dict(request.get_json()) mongo_app = connections.mongo() mongo_app.connect_to_mongo() try: mongo_app.db.mute.update_many({'status': 'Mute'}, {'$inc': { 'activity': -1 }}) mongo_app.db.mute.remove( {'$and': [{ 'status': 'Mute' }, { 'activity': 0 }]}) except: pass query = { '$and': [{ 'made_user': False }, { '$or': [{ 'status': 'Mute' }, { 'status': 'Block' }] }] } mute_users = [] for user in mongo_app.db.users.find(query): mute_users.append(user) for user in mute_users: user['activity'] = 7 mongo_app.db.users.update({'_id': user['_id']}, {'made_user': True}) user.pop('_id') mongo_app.db.mute.insert(user) return ('done')
def get_users(self): mongo_app = connections.mongo() mongo_app.connect_to_mongo() query = { '$and': [{ 'created_day': int(datetime.datetime.now().strftime('%j')) }, { 'created_year': int(datetime.datetime.now().year) }, { 'made_user': False }] } for member in mongo_app.db.users.find(query): self.users.append(member['screen_name'])
def index(): query = dict(request.get_json()) mongo_app = connections.mongo() mongo_app.connect_to_mongo() reports = [] for i in mongo_app.db.reports.find({'report': 'preprocess'}): reports.append(i) all_users = user_objects.user_set() all_users.get_users({}) results = [] results_count = len(all_users.users) for user in all_users.users: query = {} query['user'] = user.user['screen_name'] query['weights'] = { 'tweet_weight': reports[0]['tweet_weight'], 'tweet_mention_weight': reports[0]['tweet_mention_weight'], 'retweet_weight': reports[0]['retweet_weight'], 'quote_weight': reports[0]['quote_weight'], 'reply_weight': reports[0]['reply_weight'] } result = tasks.process.delay(query) t = random.uniform(0, 2) print(t) #only for debug purpose time.sleep(t) results.append(result) success_count = 0 while (success_count == results_count): success_count = 0 for result in results: if result.status == 'SUCCESS': success_count += 1 return 'done'
def show_preview(self): st.write('---') st.write('## Show preview') st.write('See what all actions have been taken on the above people.') mongo_app = connections.mongo() mongo_app.connect_to_mongo() if st.button('Show preview'): member = [] for i in mongo_app.db.users.find({}): member.append(i) df = pd.DataFrame(member, columns=[ 'screen_name', 'core_user', 'status', 'activity', 'created_year', 'created_day', 'made_user' ]) st.dataframe(df)
def show_report(self): mongo_app = connections.mongo() mongo_app.connect_to_mongo() reports = [] for report in mongo_app.db.reports.find( {'report': 'cluster_community'}): reports.append(report) if len(reports) != 0: st.write( f"The present no of Users in our community are : {reports[0]['no_users']}" ) st.write( f"The number of **Clusters** detected are : {reports[0]['no_clusters']}" ) self.no_clusters = reports[0]['no_clusters']
def is_user_added(self): if (st.button('Done')): if (self.screen_name == None or self.core_user == None or self.status == None or self.screen_name == ''): st.write('Please enter details properly 🎅👵') st.stop() else: try: keys = connections.twitter_api_keys() keys.get_existing_keys(no_of_keys=1) api = keys.keys[0].connect_to_twitter() new_user = api.get_user(id=self.screen_name) except Exception as e: if e.reason[:22] == 'Failed to send request': st.write('No Internet 😴') st.stop() if e.api_code == 50: st.write( 'User not found, please check the name properly 🥱') st.stop() user = self.make_json() mongo_app = connections.mongo() mongo_app.connect_to_mongo() mongo_app.db.users.insert(user) #self.push_to_users_collection(user) msg = f'@{self.screen_name} is added, Is Core user : {self.core_user}, @{self.screen_name} will be {self.status}, date is {self.created_day}, Add the users and press on Make users button to add users sofar' st.success(msg)
def process(self, query): weights = query['weights'] user = user_objects.user_set() user.get_users({'user.screen_name': query['user']}) user = user.users[0] all_users = user_objects.user_set() all_users.get_users({}) user.edges = [] for each_user in all_users.users: if user.user['screen_name'] != each_user.user['screen_name']: edge = {} edge['id'] = each_user.user['id'] edge['screen_name'] = each_user.user['screen_name'] is_follower = False for id in each_user.friends_id: if id['id'] == user.user['id']: is_follower = True break edge['is_follower'] = is_follower edge['bond'] = 0 user.edges.append(edge) tweets = tweet_objects.tweet_set() #today and seven days back today = datetime.date.today() seven_days_back = today - datetime.timedelta(days=7) if seven_days_back.year == today.year: tweets_query = { '$and': [{ 'user.id': user.user['id'] }, { '$and': [{ 'created_day': { '$gt': int(seven_days_back.strftime('%j')) } }, { 'created_year': seven_days_back.year }] }, { '$and': [{ 'created_day': { '$lte': int(today.strftime('%j')) } }, { 'created_year': today.year }] }] } tweets.get_tweets(tweets_query) else: tweets_query = { '$and': [{ 'user.id': user.user['id'] }, { '$and': [{ 'created_day': { '$gt': int(seven_days_back.strftime('%j')) } }, { 'created_year': seven_days_back.year }] }] } tweets.get_tweets(tweets_query) tweets_query = { '$and': [{ 'user.id': user.user['id'] }, { '$and': [{ 'created_day': { '$lte': int(today.strftime('%j')) } }, { 'created_year': today.year }] }] } tweets.get_tweets(tweets_query) tweets.preprocess_tweets() user.activity = len(tweets.tweets) now_weights = {} now_weights['base'] = weights['tweet_weight'] now_weights['tweet'] = weights['tweet_mention_weight'] now_weights['retweet'] = weights['retweet_weight'] now_weights['quote'] = weights['quote_weight'] now_weights['reply'] = weights['reply_weight'] for tweet in tweets.tweets: for edge in user.edges: if edge['is_follower']: edge['bond'] = edge['bond'] + now_weights['base'] if edge['screen_name'] in tweet.tweet_mentions: edge['bond'] = edge['bond'] + now_weights[tweet.tweet_type] mongo_app = connections.mongo() mongo_app.connect_to_mongo() mongo_app.db.community.update( {'user.screen_name': user.user['screen_name']}, {'$set': { 'activity': user.activity, 'edges': user.edges }})
def status(self): mongo_app = connections.mongo() mongo_app.connect_to_mongo() label = 'When do you want to schedule this task every day ?(Please mention in UTC timezone)' hour = st.slider(label=label, min_value=0, max_value=23, value=8, step=1) if st.button('Done'): reports = [] for report in mongo_app.db.reports.find( {'report': 'data_collection'}): reports.append(report) if len(reports) == 0: today = datetime.date.today() todaydt = datetime.datetime(today.year, today.month, today.day, 0, 0, 0) lastDate = todaydt - datetime.timedelta(days=7) query = { 'report': 'data_collection', 'scheduled_hour': hour, 'last_day': { 'scheduler': int(lastDate.strftime('%j')), 'add': int(lastDate.strftime('%j')), 'discover': int(lastDate.strftime('%j')) }, 'year': { 'scheduler': int(lastDate.year), 'add': int(lastDate.year), 'discover': int(lastDate.year) }, 'tweets_status': { 'scheduler': 'collected', 'add': 'collected', 'discover': 'collected' }, 'user_friends_status': { 'scheduler': 'collected', 'add': 'collected', 'discover': 'collected' }, 'tweets_user': { 'scheduler': '', 'add': '', 'discover': '' }, 'friends_user': { 'screen_name': '', 'user_no': 0, 'type': 'old' }, 'duration': { 'scheduler': '', 'add': '', 'discover': '' }, 'quantity': { 'scheduler': 0, 'add': 0, 'discover': 0 } } mongo_app.db.reports.insert(query) else: mongo_app.db.reports.update({'report': 'data_collection'}, {'$set': { 'scheduled_hour': hour }}) st.success(f'Changed to {hour} hour') try: reports = [] for report in mongo_app.db.reports.find( {'report': 'data_collection'}): reports.append(report) self.query = reports[0] count = mongo_app.db.tweets.find({ '$and': [{ 'downloaded_day_year': int(datetime.datetime.today().strftime('%j')) }, { 'downloaded_year': datetime.datetime.today().year }] }).count() st.write(f"Scheduled hour is :{self.query['scheduled_hour']}.") st.write( f"Live count of number of **tweets downloaded** : {count}.") st.write( f"The User whose friends are being collected '{self.query['friends_user']['screen_name']}'." ) st.write( f"Today's day of year is : {int(datetime.datetime.today().strftime('%j'))}." ) x = [ self.query['duration'], self.query['last_day'], self.query['quantity'], self.query['tweets_status'], self.query['tweets_user'], self.query['user_friends_status'], self.query['year'] ] df = pd.DataFrame(x, index=[ 'Tweets Collected Time Frame', 'Tweets Collected Last Day', 'Tweets Collected', 'Tweets Collection Status', 'Tweets Collected Latest User', 'Friends Collection Status', 'Year' ]) st.dataframe(df) except Exception as e: pass
def index(): query = dict(request.get_json()) mongo_app = connections.mongo() mongo_app.connect_to_mongo() mongo_app.db.users.remove({}) tweets = tweet_objects.tweet_set() #todays day year day = int(datetime.date.today().strftime('%j')) today = datetime.date.today() if day - 7 >= 0: query = { '$and': [{ 'created_day': { '$gt': day - 7 } }, { 'created_year': today.year }] } tweets.get_tweets(query) else: query = { '$and': [{ 'created_day': { '$gt': 0 } }, { 'created_year': today.year }] } tweets.get_tweets(query) last_year_days = int( datetime.datetime(today.year - 1, 12, 31).strftime('%j')) query = { '$and': [{ 'created_day': { '$gt': last_year_days + day - 7 } }, { 'created_year': today.year - 1 }] } tweets.get_tweets(query) tweets.preprocess_tweets() mute_users = get_mute_users() existing_users = user_objects.user_set() user_query = {} existing_users.get_users(user_query) mute_users += [user.user['screen_name'] for user in existing_users.users] discovered_users = [] for tweet in tweets.tweets: discovered_users += tweet.tweet_mentions discovered_users = Counter(discovered_users) for user_screen_name in mute_users: try: discovered_users.pop(user_screen_name) except: pass final_users = [] for k, v in discovered_users.items(): temp = {} temp['screen_name'] = k temp['activity'] = v final_users.append(temp) final_users = final_users[:30] #extra processing mongo_app = connections.mongo() mongo_app.connect_to_mongo() for user in final_users: user['core_user'] = None user['status'] = None user['created_year'] = int(datetime.datetime.now().year) user['created_day'] = int(datetime.datetime.now().strftime('%j')) user['made_user'] = False mongo_app.db.users.insert(user) return ('done')
def make_added_users(self): if (st.button('Make Added Users')): mongo_app = connections.mongo() mongo_app.connect_to_mongo() today = datetime.date.today() members = [] for i in mongo_app.db.users.find({ "status": "Add", "created_year": int(today.year), "created_day": int(today.strftime('%j')), "made_user": False }): members.append(i['screen_name']) payload = { "status": "Add", "created_year": int(today.year), "created_day": int(today.strftime('%j')), "made_user": False } url = "http://usermaker:5000/" with st.spinner('Wait making users'): session = requests.Session() session.trust_env = False report = session.post(url, json=payload) if (report.status_code == 200): st.success('Made users 😉' + ' ' + str(report.text)) else: st.success('Please try again 😢') #collecting their tweets with st.spinner('Collecting tweets...'): #collecting tweets session = requests.Session() session.trust_env = False user_query = {'user.screen_name': {'$in': members}} tweets_payload = { 'days': 7, 'user_payload': user_query, 'source': 'add' } url = "http://datacollection:5010/tweets" report = session.post(url, json=tweets_payload) time.sleep(5) user_friends_query = { '$and': [{ 'day_added': int(datetime.datetime.now().strftime('%j')) }, { 'year_added': int(datetime.datetime.now().year) }] } user_payload = { 'type': 'new', 'user_friends_payload': user_friends_query, 'source': 'add' } url = "http://datacollection:5010/user_friends" report = session.post(url, json=user_payload)
def tweets_celery_collector(self, query): #connecting to db mongo_app = connections.mongo() mongo_app.connect_to_mongo() #today today = datetime.date.today() #connecting to twitter api_keys = connections.twitter_api_keys() api_keys.get_existing_keys(no_of_keys=1) api = api_keys.keys[0].connect_to_twitter() #setting time period todaydt = datetime.datetime(today.year, today.month, today.day, 0, 0, 0) startDate = todaydt - datetime.timedelta(days=query['days']) endDate = todaydt - datetime.timedelta(days=2) #set status in db update_query = { '$set': { f"last_day.{query['source']}": int(today.strftime('%j')), f"tweets_status.{query['source']}": 'collecting', f"duration.{query['source']}": f"{startDate} - {endDate}" } } mongo_app.db.reports.update({'report': 'data_collection'}, update_query) #getting users users = user_objects.user_set() payload = query['user_payload'] users.get_users(payload) #collect tweets tweets = [] i = 0 j = 1 count = 0 for user in users.users: i = i + 1 try: tmpTweets = api.user_timeline(user.user['screen_name']) except tweepy.TweepError as e: try: if e.reason[0:22] == 'Failed to send request': return ('No Network') except: print('Skipping' + user.user['screen_name']) print(i, user.user['screen_name'], j) for tweet in tmpTweets: if tweet.created_at < endDate and tweet.created_at > startDate: tweet._json['created_day'] = int( tweet.created_at.strftime('%j')) tweet._json['created_year'] = tweet.created_at.year tweet._json['tweeted_hour'] = int( tweet._json['created_at'][11:13]) tweets.append(tweet) try: while (tmpTweets[-1].created_at > startDate): print("Last Tweet @", tmpTweets[-1].created_at, " - fetching some more") i = i + 1 try: tmpTweets = api.user_timeline(user.user['screen_name'], max_id=tmpTweets[-1].id) except tweepy.TweepError as e: try: if e.reason[0:22] == 'Failed to send request': return ('No Network') except: print('Skipping' + user.user['screen_name']) print(i, user.user['screen_name'], j) for tweet in tmpTweets: if tweet.created_at < endDate and tweet.created_at > startDate: tweet._json['created_day'] = int( tweet.created_at.strftime('%j')) tweet._json['created_year'] = tweet.created_at.year tweet._json['tweeted_hour'] = int( tweet._json['created_at'][11:13]) tweets.append(tweet) except IndexError: print('*=*=*=*= NO TWEETS BY *=*=*=*=*=', user, j) j = j + 1 #updating status in reports mongo_app.db.reports.update({'report': 'data_collection'}, { '$set': { f"tweets_user.{query['source']}": user.user['screen_name'] } }) #pulling json part of tweets status collected tweets_json = [] for status in tweets: tweets_json.append(status._json) #removing duplicates ids = {} duplicates = [] index = 0 for tweet in tweets_json: if tweet['id'] in ids: duplicates.append(index) else: ids[tweet['id']] = 0 index += 1 for index in sorted(duplicates, reverse=True): del tweets[index] #inserting to database i = 0 for tweet in tweets_json: tweet['downloaded_day_year'] = int(today.strftime('%j')) tweet['downloaded_year'] = int(today.year) if '_id' in tweet: tweet.pop('_id') print(i) if not mongo_app.db.tweets.find({'id': tweet['id']}).count() > 0: mongo_app.db.tweets.insert_one(tweet) count += 1 i = i + 1 #set status in db mongo_app.db.reports.update({'report': 'data_collection'}, { '$set': { f"tweets_status.{query['source']}": 'collected', f"quantity.{query['source']}": count } })
def user_friends_celery_collector(self, query): #connecting to db mongo_app = connections.mongo() mongo_app.connect_to_mongo() #today today = datetime.date.today() #connecting to twitter api_keys = connections.twitter_api_keys() api_keys.get_existing_keys() #set status in db update_query = { '$set': { f"last_day.{query['source']}": int(today.strftime('%j')), f"user_friends_status.{query['source']}": 'collecting' } } mongo_app.db.reports.update({'report': 'data_collection'}, update_query) #getting users users = user_objects.user_set() payload = query['user_friends_payload'] users.get_users(payload) #checking if it stopped in between reports = [] for report in mongo_app.db.reports.find({'report': 'data_collection'}): reports.append(report) if len(reports) != 0: if reports[0]['friends_user']['type'] == query['type']: if reports[0]['friends_user']['user_no'] == len(users.users) - 1: user_no = 0 elif reports[0]['friends_user']['user_no'] == 0: user_no = 0 else: user_no = reports[0]['friends_user']['user_no'] - 1 else: user_no = 0 key_no = 0 api = api_keys.keys[key_no].connect_to_twitter_no_wait() while user_no < len(users.users): try: i = 0 friends = [] for id in tweepy.Cursor(api.friends_ids, screen_name=users.users[user_no]. user['screen_name']).items(): friends.append({'id': id}) i = i + 1 #Printing Status print(i, users.users[user_no].user['screen_name'], user_no) #updating dB mongo_app.db.community.update_one( {'_id': users.user_ids[user_no]}, {'$set': { 'friends_id': friends }}) #writing status to reports mongo_app.db.reports.update({'report': 'data_collection'}, { '$set': { 'friends_user': { 'screen_name': users.users[user_no].user['screen_name'], 'user_no': user_no, 'type': query['type'] } } }) user_no = user_no + 1 except tweepy.RateLimitError: time.sleep(60) key_no = (key_no + 1) % (len(api_keys.keys)) api = api_keys.keys[key_no].connect_to_twitter_no_wait() print(key_no) print(88) except Exception as e: try: if e.reason[0:22] == 'Failed to send request': return ('No Network') except: print('skipping ' + users.users[user_no].user['screen_name']) print(e) user_no += 1 #set status in db mongo_app.db.reports.update( {'report': 'data_collection'}, {'$set': { f"user_friends_status.{query['source']}": 'collected' }})
def finilize(self): st.write('---') st.write('## Finilize Users') st.write('Finilize the actions taken above.') if st.button('Finilize Users'): today = datetime.date.today() #getting receantly added users mongo_app = connections.mongo() mongo_app.connect_to_mongo() members = [] for i in mongo_app.db.users.find({ "status": "Discover", "created_year": int(today.year), "created_day": int(today.strftime('%j')), "made_user": False }): members.append(i['screen_name']) #making users with st.spinner('Making Users...'): payload = { "status": "Discover", "created_year": int(today.year), "created_day": int(today.strftime('%j')), "made_user": False } url = "http://usermaker:5000/" session = requests.Session() session.trust_env = False report = session.post(url, json=payload) if (report.status_code == 200): st.success('Made users 😉' + ' ' + str(report.text)) else: st.success('Please try again 😢') #muting users with st.spinner('Muting Users...'): payload = {} url = "http://muteusers:5040/" session = requests.Session() session.trust_env = False report = session.post(url, json=payload) if (report.status_code == 200): st.success('Muted Users 😶') else: st.success('Please try again 😢') #collecting their tweets with st.spinner('Collecting tweets...'): #collecting tweets session = requests.Session() session.trust_env = False user_query = {'user.screen_name': {'$in': members}} tweets_payload = { 'days': 7, 'user_payload': user_query, 'source': 'discover' } url = "http://datacollection:5010/tweets" report = session.post(url, json=tweets_payload) time.sleep(5) user_friends_query = { '$and': [{ 'day_added': int(datetime.datetime.now().strftime('%j')) }, { 'year_added': int(datetime.datetime.now().year) }] } user_payload = { 'type': 'new', 'user_friends_payload': user_friends_query, 'source': 'discover' } url = "http://datacollection:5010/user_friends" report = session.post(url, json=user_payload)
def get_details(self): st.write( 'Please **adjust** the below **values** to get **best Clusters**') label = 'Weight for plane Tweet' self.tweet_weight = st.slider(label, min_value=0.000, max_value=10000.000, value=0.010, step=0.001) label = 'Weight for Tweet with mention' self.tweet_mention_weight = st.slider(label, min_value=0.000, max_value=10000.000, value=0.100, step=0.001) label = 'Weight for retweet' self.retweet_weight = st.slider(label, min_value=0.000, max_value=10000.000, value=1.000, step=0.001) label = 'Weight for Quote' self.quote_weight = st.slider(label, min_value=0.000, max_value=10000.000, value=10.000, step=0.001) label = 'Weight for reply' self.reply_weight = st.slider(label, min_value=0.000, max_value=10000.000, value=100.000, step=0.001) st.write( 'During testing the default weights were giving good results,') st.write( 'If you do not get good Clusters please adjust above weights.') mongo_app = connections.mongo() mongo_app.connect_to_mongo() if st.button('Done'): reports = [] for i in mongo_app.db.reports.find({'report': 'preprocess'}): reports.append(i) if len(reports) == 0: mongo_app.db.reports.insert({ 'report': 'preprocess', 'tweet_weight': self.tweet_weight, 'tweet_mention_weight': self.tweet_mention_weight, 'retweet_weight': self.retweet_weight, 'quote_weight': self.quote_weight, 'reply_weight': self.reply_weight }) else: mongo_app.db.reports.update({'report': 'preprocess'}, { '$set': { 'tweet_weight': self.tweet_weight, 'tweet_mention_weight': self.tweet_mention_weight, 'retweet_weight': self.retweet_weight, 'quote_weight': self.quote_weight, 'reply_weight': self.reply_weight } }) try: reports = [] for i in mongo_app.db.reports.find({'report': 'preprocess'}): reports.append(i) st.write( '- ' + f"{reports[0]['tweet_weight']}, this is the amount of bond strength gained by the followers of, if the user tweets a simple tweet." ) st.write( '- ' + f"{reports[0]['tweet_mention_weight']}, this is the amount of bond strength gained by a twitter user, if a user mentions the twitter user in his tweet." ) st.write( '- ' + f"{reports[0]['retweet_weight']}, this is the amount of bond strength gained by a twitter user, if a user retweets a tweet of a twitter user." ) st.write( '- ' + f"{reports[0]['quote_weight']}, this is the amount of bond strength gained by a twitter user, if a user quotes a tweet of a twitter user." ) st.write( '- ' + f"{reports[0]['reply_weight']}, this is the amount of bond strength gained by a twitter user, if a user replies to a tweet of a twitter user." ) except: pass
def discover(self, discover_query): mongo_app = connections.mongo() mongo_app.connect_to_mongo() cluster_number = discover_query['cluster_number'] vector_template = [] tweet_vectors = [] users_t1 = user_objects.user_set() query = {'cluster_number': cluster_number} users_t1.get_users(query) users_screen_name_t1 = [i.user['screen_name'] for i in users_t1.users] users_t2 = user_objects.user_set() query = {'cluster_number': {'$ne': cluster_number}} users_t2.get_users(query) users_screen_name_t2 = [i.user['screen_name'] for i in users_t2.users] #setting Dates today = datetime.date.today() seven_days_back = today - datetime.timedelta(days=7) tweets_t1 = tweet_objects.tweet_set() start = 0 for user in users_t1.users: if seven_days_back.year == today.year: query = { '$and': [{ 'user.id': user.user['id'] }, { '$and': [{ 'created_day': { '$gt': int(seven_days_back.strftime('%j')) } }, { 'created_year': seven_days_back.year }] }, { '$and': [{ 'created_day': { '$lte': int(today.strftime('%j')) } }, { 'created_year': today.year }] }] } tweets_t1.get_tweets(query) else: query = { '$and': [{ 'user.id': user.user['id'] }, { '$and': [{ 'created_day': { '$gt': int(seven_days_back.strftime('%j')) } }, { 'created_year': seven_days_back.year }] }] } tweets_t1.get_tweets(query) query = { '$and': [{ 'user.id': user.user['id'] }, { '$and': [{ 'created_day': { '$lte': int(today.strftime('%j')) } }, { 'created_year': today.year }] }] } tweets_t1.get_tweets(query) end = len(tweets_t1.tweets) tweets_t1.preprocess_tweets_range(start, end) ut1 = {} ut2 = {} ut3 = {} vector_template.append(user.user['screen_name']) for tweet in tweets_t1.tweets[start:end]: t1 = 0 t2 = 0 t3 = 0 for member in tweet.tweet_mentions: vector_template.append(member) if member in users_screen_name_t1: t1 += 1 if member not in ut1: ut1[member] = 1 else: ut1[member] += 1 elif member in users_screen_name_t2: t2 += 1 if member not in ut2: ut2[member] = 1 else: ut2[member] += 1 else: t3 += 1 if member not in ut3: ut3[member] = 1 else: ut3[member] += 1 t1 += 1 maxt = max(t1, t2, t3) if maxt == t1: tier = 1 elif maxt == t2: tier = 2 else: tier = 3 tweet.tweet['tier'] = tier if len(ut1) == 0: t1_screen_name = '' t1_interactions = 0 else: t1_screen_name = max(ut1) t1_interactions = ut1[max(ut1)] if len(ut2) == 0: t2_screen_name = '' t2_interactions = 0 else: t2_screen_name = max(ut2) t2_interactions = ut2[max(ut2)] if len(ut3) == 0: t3_screen_name = '' t3_interactions = 0 else: t3_screen_name = max(ut3) t3_interactions = ut3[max(ut3)] tier1 = { 'screen_name': t1_screen_name, 'interactions': t1_interactions } tier2 = { 'screen_name': t2_screen_name, 'interactions': t2_interactions } tier3 = { 'screen_name': t3_screen_name, 'interactions': t3_interactions } query = {'user.screen_name': user.user['screen_name']} payload = { '$set': { 'bond_stats': { 'tier1': tier1, 'tier2': tier2, 'tier3': tier3 } } } mongo_app.db.community.update(query, payload) start = end vector_template = list(set(vector_template)) for tweet in tweets_t1.tweets: vector = [] for member in vector_template: if ((member in tweet.tweet_mentions) or (member == tweet.tweet_user_screen_name)): time = 1 + float(tweet.tweet['created_day'] / 4) + float( 0.25 * tweet.tweet['tweeted_hour'] / 24) else: time = 0 vector.append(time) day = float(tweet.tweet['created_day'] / 4) hour = float(0.25 * tweet.tweet['tweeted_hour'] / 24) vector.append(day) vector.append(hour) tweet_vectors.append(vector) if len(tweet_vectors) == 0: return (f'No tweets - {cluster_number}') #clustering df = pd.DataFrame(tweet_vectors) clustring = DBSCAN(eps=0.25, min_samples=1).fit(df) for i in range(len(clustring.labels_)): tweets_t1.tweets[i].tweet['thread_number'] = clustring.labels_[i] tweet_df = pd.DataFrame([tweet.tweet for tweet in tweets_t1.tweets]) columns_to_remove = [ '_id', 'id_str', 'truncated', 'entities', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'geo', 'coordinates', 'place', 'contributors', 'is_quote_status', 'favorited', 'retweeted', 'lang', 'downloaded_day_year', 'downloaded_year', 'extended_entities', 'possibly_sensitive', 'quoted_status_id', 'quoted_status_id_str', 'quoted_status', 'retweeted_status' ] for column in columns_to_remove: try: tweet_df.drop(column, axis=1, inplace=True) except Exception as e: pass tweet_df['screen_name'] = tweet_df['user'].apply( lambda x: x['screen_name']) tweet_df['friends_count'] = tweet_df['user'].apply( lambda x: x['friends_count']) tweet_df['followers_count'] = tweet_df['user'].apply( lambda x: x['followers_count']) tweet_df.drop('user', axis=1, inplace=True) tweet_df['tweeted_at'] = tweet_df.apply( lambda row: datetime.datetime(row.created_year, 1, 1, row.tweeted_hour) + datetime.timedelta(row.created_day - 1), axis=1) tweet_df['tweet_url'] = tweet_df.apply( lambda row: f'https://twitter.com/{row.screen_name}/status/{row.id}', axis=1) tier = 1 while (tier <= 3): temp_df = tweet_df[tweet_df['tier'] == tier] if len(temp_df) == 0: print( f'{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number}' ) else: print( f'{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*' ) thread_df = {} thread_df['thread_number'] = list( temp_df.groupby('thread_number').count().reset_index() ['thread_number']) time_stamps = [] count = [] for i in thread_df['thread_number']: time_df = temp_df[temp_df['thread_number'] == i] time_stamps.append(min(time_df['tweeted_at'])) count.append(len(time_df)) thread_df['time_stamps'] = time_stamps thread_df['count'] = count thread_df = pd.DataFrame(thread_df) plot_df = pd.DataFrame( thread_df.groupby('time_stamps').count().reset_index() ['time_stamps']) plot_df['thread_count'] = thread_df.groupby( 'time_stamps').count().reset_index()['count'] plot_df['tweet_count'] = thread_df.groupby( 'time_stamps').sum().reset_index()['count'] stamps = list(plot_df[plot_df['thread_count'] > ( plot_df['thread_count'].mean() + 2 * plot_df['thread_count'].std())]['time_stamps']) for i in thread_df['thread_number']: time_df = temp_df[temp_df['thread_number'] == i] time_stamp = min(time_df['tweeted_at']) if time_stamp in stamps: for j in range(len(time_df)): id = int(time_df.iloc[j]['id']) temp_tier = int(time_df.iloc[j]['tier']) thread_number = i discover = True query = {'id': id} payload = { '$set': { 'tier': temp_tier, 'thread_number': thread_number, 'discover': discover, 'cluster_number': cluster_number } } mongo_app.db.tweets.update(query, payload) else: for j in range(len(time_df)): id = int(time_df.iloc[j]['id']) temp_tier = int(time_df.iloc[j]['tier']) thread_number = i discover = False query = {'id': id} payload = { '$set': { 'tier': temp_tier, 'thread_number': thread_number, 'discover': discover, 'cluster_number': cluster_number } } mongo_app.db.tweets.update(query, payload) tier += 1
def main(): mongo_app = connections.mongo() mongo_app.connect_to_mongo() iteration = 0 while True: time.sleep(60 * 30) hour = int(datetime.datetime.now().hour) day = int(datetime.datetime.now().strftime('%j')) reports = [] for report in mongo_app.db.reports.find({'report': 'data_collection'}): reports.append(report) if len(reports) != 0: if ((reports[0]['last_day']['scheduler'] != day) and (reports[0]['scheduled_hour'] < hour)): session = requests.Session() session.trust_env = False user_query = {} lastDate = datetime.datetime( reports[0]['year']['scheduler'], 1, 1 ) + datetime.timedelta(reports[0]['last_day']['scheduler'] - 1) days = datetime.datetime.now() - lastDate if days.days + 2 >= 7: days = 7 else: days = days.days + 2 tweets_payload = { 'days': days, 'user_payload': user_query, 'source': 'scheduler' } url = "http://datacollection:5010/tweets" report = session.post(url, json=tweets_payload) time.sleep(5) user_friends_query = {} user_payload = { 'type': 'old', 'user_friends_payload': user_friends_query, 'source': 'scheduler' } url = "http://datacollection:5010/user_friends" report = session.post(url, json=user_payload) today = datetime.datetime.now() month_back = today - datetime.timedelta(days=30) month_back_day = month_back.strftime('%j') month_back_year = month_back.year mongo_app.db.tweets.remove( {'created_year': month_back_year - 1}) mongo_app.db.tweets.remove({ '$and': [{ 'created_year': month_back_year }, { 'created_day': { '$lt': month_back_day } }] }) elif iteration != 0: try: if ((reports[0]['last_day']['scheduler'] == day) and (reports[0]['tweets_status']['scheduler'] == 'collecting') and (reports[0]['tweets_user']['scheduler'] == old_user_tweets)): url = "http://datacollection:5010/tweets" report = session.post(url, json=tweets_payload) if ((reports[0]['last_day']['scheduler'] == day) and (reports[0]['user_friends_status']['scheduler'] == 'collecting') and (reports[0]['friends_user']['screen_name'] == old_user_friends)): url = "http://datacollection:5010/user_friends" report = session.post(url, json=user_payload) except Exception as e: pass old_user_tweets = reports[0]['tweets_user']['scheduler'] old_user_friends = reports[0]['friends_user']['screen_name'] iteration += 1