Esempio n. 1
0
    def take_actions(self):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        label = 'Select Users to take action'
        done = st.multiselect(label, [i for i in self.users])

        try:

            user = done[-1]

            url = f'[{user}](https://twitter.com/{user})'

            st.write('@' + url +
                     ' is the selected user, Please fill the details below')

            activity = []
            for i in mongo_app.db.users.find({'screen_name': user}):
                activity.append(i['activity'])

            st.write('The number of interactions with @' + url + ' is : ' +
                     str(activity[0]))

            options = [None, 'Discover', 'Mute', 'Block']
            label = 'What action do you wan to take on @' + user
            status = st.radio(label, options, index=0)

            options = [None, 'Yes', 'No']
            label = 'Do you want @' + user + ' to be Core user ?'
            core_user = st.radio(label, options, index=0)

            if st.button('Done'):

                if ((status == None) or (core_user == None)):
                    st.write('Enter above details properly')
                    st.stop()
                else:

                    with st.spinner('Working'):

                        mongo_app.db.users.update({'screen_name': user}, {
                            '$set': {
                                'status': status,
                                'core_user': core_user
                            }
                        })

                        member = []
                        for i in mongo_app.db.users.find({'screen_name':
                                                          user}):
                            member.append(i)

                        label = member[0]['screen_name'] + ', ' + member[0][
                            'status'] + ', ' + member[0]['core_user']
                        st.success(label)

        except IndexError:
            st.write(
                'Start selecting users, After pressing Discover users button')
    def make_new_user(self, user_json, api):

        new_user = api.get_user(id=user_json['screen_name'])
        new_user_json = new_user._json

        self.user['screen_name'] = new_user_json['screen_name']
        self.user['id'] = new_user_json['id']
        self.user['id_str'] = new_user_json['id_str']
        self.user['name'] = new_user_json['name']

        if user_json['core_user'] == 'Yes':
            self.core_user = True
        else:
            self.core_user = False

        self.day_added = user_json['created_day']
        self.year_added = user_json['created_year']
        self.bond_stats = None

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        mongo_app.db.users.update({'_id': user_json['_id']},
                                  {'$set': {
                                      'made_user': True
                                  }})
Esempio n. 3
0
    def delete_user(self):

        st.write('---')
        st.write('## Delete User')
        st.write('To delete a user, Please enter the details below')

        label = 'Enter screen_name of user'
        screen_name = str(st.text_input(label))

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        if st.button('Delete'):

            if mongo_app.db.community.find({
                    'user.screen_name': screen_name
            }).count() == 0:

                st.write(f'@{screen_name} is not their 😏')
                st.stop()

            else:

                mongo_app.db.community.remove(
                    {'user.screen_name': screen_name})
                mongo_app.db.tweets.remove({'user.screen_name': screen_name})
                st.write(f'@{screen_name} is deleted')
Esempio n. 4
0
def index():

    query = dict(request.get_json())

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    for i in mongo_app.db.reports.find({'report': 'cluster_community'}):
        no_clusters = i['no_clusters']
        break

    results = []
    results_count = no_clusters

    for cluster_number in range(no_clusters):

        query = {'cluster_number': cluster_number}

        result = tasks.discover.delay(query)
        t = random.uniform(0, 3)
        print(t)  #only for debug purpose
        time.sleep(t)

        results.append(result)

    success_count = 0
    while (success_count == results_count):

        success_count = 0
        for result in results:
            if result.status == 'SUCCESS':
                success_count += 1

    return 'done'
def index():

    query = dict(request.get_json())

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    all_users = user_objects.user_set()
    all_users.get_users({})

    #making empty graph
    g = ig.Graph(directed=True)

    #making vertices
    for user in all_users.users:
        g.add_vertex(name=user.user['screen_name'], size=user.activity)

    #making edges
    for user in all_users.users:
        for edge in user.edges:
            if edge['bond'] != 0:
                g.add_edge(user.user['screen_name'],
                           edge['screen_name'],
                           weight=edge['bond'])

    #clustering people
    cluster = g.community_infomap(edge_weights=g.es['weight'],
                                  vertex_weights=g.vs['size'],
                                  trials=1000)

    cluster_number = 0
    for graph in cluster.subgraphs():

        for vertex in graph.vs:
            mongo_app.db.community.update(
                {'user.screen_name': vertex['name']},
                {'$set': {
                    'cluster_number': cluster_number
                }})

        cluster_number += 1

    if mongo_app.db.reports.find({'report': 'cluster_community'}).count() > 0:

        mongo_app.db.reports.update({'report': 'cluster_community'}, {
            '$set': {
                'no_users': len(all_users.users),
                'no_clusters': cluster_number
            }
        })

    else:
        mongo_app.db.reports.insert({
            'report': 'cluster_community',
            'no_users': len(all_users.users),
            'no_clusters': cluster_number
        })

    return ('done')
Esempio n. 6
0
    def get_tweets(self, query):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        for each_tweet in mongo_app.db.tweets.find(query):

            tweet = Tweet(each_tweet)
            self.tweets.append(tweet)
    def make_new_users(self, query):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        mongo_app.db.users.remove({
            '$and': [{
                'created_year': {
                    '$ne': query['created_year']
                }
            }, {
                'created_day': {
                    '$ne': query['created_day']
                }
            }]
        })
        users = []
        for member in mongo_app.db.users.find({
                '$and': [{
                    'status': query['status']
                }, {
                    'created_year': query['created_year']
                }, {
                    'created_day': query['created_day']
                }, {
                    'made_user': query['made_user']
                }]
        }):
            users.append(member)

        keys = connections.twitter_api_keys()
        keys.get_existing_keys(no_of_keys=1)

        api = keys.keys[0].connect_to_twitter()

        try:

            new_user = api.get_user(id=users[0]['screen_name'])
        except Exception as e:

            if e.reason[:22] == 'Failed to send request':
                st.write('No Internet 😴')
                st.stop()

            if e.api_code == 50:
                st.write('User not found, please check the name properly 🥱')
                st.stop()

        for member in users:

            new_user = User()
            new_user.make_new_user(member, api)

            self.users.append(new_user)
Esempio n. 8
0
    def delete(self):

        st.write('---')
        st.write('## Delete API APP keys')

        app_name = str(st.text_input('Enter API APP name'))

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        mongo_app.db.auth.remove({'app_name': app_name})
    def get_users(self, query):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        for user in mongo_app.db.community.find(query):

            existing_user = User()
            existing_user.make_user(user)

            self.users.append(existing_user)
            self.user_ids.append(user['_id'])
    def push_to_community(self):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        for user in self.users:

            user_json = user.make_user_json()

            if not mongo_app.db.community.find({
                    'user': user_json['user']
            }).count() > 0:
                mongo_app.db.community.insert(user_json)
def get_mute_users():

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    try:
        mute_users = []
        for user in mongo_app.db.mute.find({}):
            mute_users.append(user['screen_name'])
    except:
        pass

    return mute_users
Esempio n. 12
0
    def is_auth_added(self):

        if (st.button('Add API Keys')):

            correct = self.is_correct()
            repeated = self.is_repeated()

            if (repeated or not correct):
                st.write(
                    'Please enter details properly, probabably you are missing to fill something or are trying to add same key again, if nothing solves try deleting the existing api app and add new one'
                )
                st.stop()
            else:

                key_json = self.key.make_json()

                mongo_app = connections.mongo()
                mongo_app.connect_to_mongo()

                #testing
                api = self.key.connect_to_twitter_no_wait()

                try:
                    user_id = 'Twitter'

                    user = api.get_user(id=user_id)

                except tweepy.RateLimitError:

                    st.write(
                        'Try after 15 mins as we have exceded twitter rate limit'
                    )
                    st.stop()

                except Exception as e:

                    if e.reason[:22] == 'Failed to send request':
                        st.write('No Internet 🥱')
                        st.stop()

                    else:
                        st.write(
                            'There may be some mistake in the keys entered 😭')
                        st.stop()

                mongo_app.db.auth.insert(key_json)
                st.write('**To Delete key, Use Mongo Compass.**')
                msg = 'The Twitter API Key is Added, Now you can collect more Data efficiently'
                st.success(msg)
def index():

    query = dict(request.get_json())

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    try:

        mongo_app.db.mute.update_many({'status': 'Mute'},
                                      {'$inc': {
                                          'activity': -1
                                      }})

        mongo_app.db.mute.remove(
            {'$and': [{
                'status': 'Mute'
            }, {
                'activity': 0
            }]})

    except:
        pass

    query = {
        '$and': [{
            'made_user': False
        }, {
            '$or': [{
                'status': 'Mute'
            }, {
                'status': 'Block'
            }]
        }]
    }

    mute_users = []
    for user in mongo_app.db.users.find(query):

        mute_users.append(user)

    for user in mute_users:

        user['activity'] = 7
        mongo_app.db.users.update({'_id': user['_id']}, {'made_user': True})
        user.pop('_id')
        mongo_app.db.mute.insert(user)

    return ('done')
Esempio n. 14
0
    def get_users(self):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        query = {
            '$and': [{
                'created_day': int(datetime.datetime.now().strftime('%j'))
            }, {
                'created_year': int(datetime.datetime.now().year)
            }, {
                'made_user': False
            }]
        }

        for member in mongo_app.db.users.find(query):
            self.users.append(member['screen_name'])
def index():

    query = dict(request.get_json())

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    reports = []
    for i in mongo_app.db.reports.find({'report': 'preprocess'}):
        reports.append(i)

    all_users = user_objects.user_set()
    all_users.get_users({})

    results = []
    results_count = len(all_users.users)

    for user in all_users.users:

        query = {}
        query['user'] = user.user['screen_name']
        query['weights'] = {
            'tweet_weight': reports[0]['tweet_weight'],
            'tweet_mention_weight': reports[0]['tweet_mention_weight'],
            'retweet_weight': reports[0]['retweet_weight'],
            'quote_weight': reports[0]['quote_weight'],
            'reply_weight': reports[0]['reply_weight']
        }

        result = tasks.process.delay(query)
        t = random.uniform(0, 2)
        print(t)  #only for debug purpose
        time.sleep(t)
        results.append(result)

    success_count = 0
    while (success_count == results_count):

        success_count = 0
        for result in results:
            if result.status == 'SUCCESS':
                success_count += 1

    return 'done'
Esempio n. 16
0
    def show_preview(self):

        st.write('---')
        st.write('## Show preview')
        st.write('See what all actions have been taken on the above people.')
        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        if st.button('Show preview'):
            member = []
            for i in mongo_app.db.users.find({}):
                member.append(i)
            df = pd.DataFrame(member,
                              columns=[
                                  'screen_name', 'core_user', 'status',
                                  'activity', 'created_year', 'created_day',
                                  'made_user'
                              ])
            st.dataframe(df)
Esempio n. 17
0
    def show_report(self):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        reports = []
        for report in mongo_app.db.reports.find(
            {'report': 'cluster_community'}):
            reports.append(report)

        if len(reports) != 0:

            st.write(
                f"The present no of Users in our community are : {reports[0]['no_users']}"
            )
            st.write(
                f"The number of **Clusters** detected are : {reports[0]['no_clusters']}"
            )

            self.no_clusters = reports[0]['no_clusters']
Esempio n. 18
0
    def is_user_added(self):

        if (st.button('Done')):

            if (self.screen_name == None or self.core_user == None
                    or self.status == None or self.screen_name == ''):
                st.write('Please enter details properly 🎅👵')
                st.stop()
            else:

                try:
                    keys = connections.twitter_api_keys()
                    keys.get_existing_keys(no_of_keys=1)

                    api = keys.keys[0].connect_to_twitter()

                    new_user = api.get_user(id=self.screen_name)
                except Exception as e:

                    if e.reason[:22] == 'Failed to send request':
                        st.write('No Internet 😴')
                        st.stop()

                    if e.api_code == 50:
                        st.write(
                            'User not found, please check the name properly 🥱')
                        st.stop()

                user = self.make_json()

                mongo_app = connections.mongo()
                mongo_app.connect_to_mongo()

                mongo_app.db.users.insert(user)
                #self.push_to_users_collection(user)

                msg = f'@{self.screen_name} is added, Is Core user : {self.core_user}, @{self.screen_name} will be {self.status}, date is {self.created_day}, Add the users and press on Make users button to add users sofar'
                st.success(msg)
Esempio n. 19
0
def process(self, query):

    weights = query['weights']

    user = user_objects.user_set()
    user.get_users({'user.screen_name': query['user']})

    user = user.users[0]

    all_users = user_objects.user_set()
    all_users.get_users({})

    user.edges = []

    for each_user in all_users.users:

        if user.user['screen_name'] != each_user.user['screen_name']:

            edge = {}
            edge['id'] = each_user.user['id']
            edge['screen_name'] = each_user.user['screen_name']

            is_follower = False
            for id in each_user.friends_id:
                if id['id'] == user.user['id']:
                    is_follower = True
                    break

            edge['is_follower'] = is_follower
            edge['bond'] = 0
            user.edges.append(edge)

    tweets = tweet_objects.tweet_set()

    #today and seven days back
    today = datetime.date.today()
    seven_days_back = today - datetime.timedelta(days=7)

    if seven_days_back.year == today.year:

        tweets_query = {
            '$and': [{
                'user.id': user.user['id']
            }, {
                '$and': [{
                    'created_day': {
                        '$gt': int(seven_days_back.strftime('%j'))
                    }
                }, {
                    'created_year': seven_days_back.year
                }]
            }, {
                '$and': [{
                    'created_day': {
                        '$lte': int(today.strftime('%j'))
                    }
                }, {
                    'created_year': today.year
                }]
            }]
        }

        tweets.get_tweets(tweets_query)

    else:

        tweets_query = {
            '$and': [{
                'user.id': user.user['id']
            }, {
                '$and': [{
                    'created_day': {
                        '$gt': int(seven_days_back.strftime('%j'))
                    }
                }, {
                    'created_year': seven_days_back.year
                }]
            }]
        }

        tweets.get_tweets(tweets_query)

        tweets_query = {
            '$and': [{
                'user.id': user.user['id']
            }, {
                '$and': [{
                    'created_day': {
                        '$lte': int(today.strftime('%j'))
                    }
                }, {
                    'created_year': today.year
                }]
            }]
        }

        tweets.get_tweets(tweets_query)

    tweets.preprocess_tweets()

    user.activity = len(tweets.tweets)

    now_weights = {}
    now_weights['base'] = weights['tweet_weight']
    now_weights['tweet'] = weights['tweet_mention_weight']
    now_weights['retweet'] = weights['retweet_weight']
    now_weights['quote'] = weights['quote_weight']
    now_weights['reply'] = weights['reply_weight']

    for tweet in tweets.tweets:

        for edge in user.edges:

            if edge['is_follower']:
                edge['bond'] = edge['bond'] + now_weights['base']
            if edge['screen_name'] in tweet.tweet_mentions:
                edge['bond'] = edge['bond'] + now_weights[tweet.tweet_type]

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()
    mongo_app.db.community.update(
        {'user.screen_name': user.user['screen_name']},
        {'$set': {
            'activity': user.activity,
            'edges': user.edges
        }})
Esempio n. 20
0
    def status(self):

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        label = 'When do you want to schedule this task every day ?(Please mention in UTC timezone)'
        hour = st.slider(label=label,
                         min_value=0,
                         max_value=23,
                         value=8,
                         step=1)

        if st.button('Done'):

            reports = []
            for report in mongo_app.db.reports.find(
                {'report': 'data_collection'}):
                reports.append(report)

            if len(reports) == 0:

                today = datetime.date.today()
                todaydt = datetime.datetime(today.year, today.month, today.day,
                                            0, 0, 0)
                lastDate = todaydt - datetime.timedelta(days=7)

                query = {
                    'report': 'data_collection',
                    'scheduled_hour': hour,
                    'last_day': {
                        'scheduler': int(lastDate.strftime('%j')),
                        'add': int(lastDate.strftime('%j')),
                        'discover': int(lastDate.strftime('%j'))
                    },
                    'year': {
                        'scheduler': int(lastDate.year),
                        'add': int(lastDate.year),
                        'discover': int(lastDate.year)
                    },
                    'tweets_status': {
                        'scheduler': 'collected',
                        'add': 'collected',
                        'discover': 'collected'
                    },
                    'user_friends_status': {
                        'scheduler': 'collected',
                        'add': 'collected',
                        'discover': 'collected'
                    },
                    'tweets_user': {
                        'scheduler': '',
                        'add': '',
                        'discover': ''
                    },
                    'friends_user': {
                        'screen_name': '',
                        'user_no': 0,
                        'type': 'old'
                    },
                    'duration': {
                        'scheduler': '',
                        'add': '',
                        'discover': ''
                    },
                    'quantity': {
                        'scheduler': 0,
                        'add': 0,
                        'discover': 0
                    }
                }

                mongo_app.db.reports.insert(query)

            else:

                mongo_app.db.reports.update({'report': 'data_collection'},
                                            {'$set': {
                                                'scheduled_hour': hour
                                            }})

            st.success(f'Changed to {hour} hour')

        try:
            reports = []
            for report in mongo_app.db.reports.find(
                {'report': 'data_collection'}):
                reports.append(report)

            self.query = reports[0]
            count = mongo_app.db.tweets.find({
                '$and': [{
                    'downloaded_day_year':
                    int(datetime.datetime.today().strftime('%j'))
                }, {
                    'downloaded_year': datetime.datetime.today().year
                }]
            }).count()

            st.write(f"Scheduled hour is :{self.query['scheduled_hour']}.")
            st.write(
                f"Live count of number of **tweets downloaded** : {count}.")
            st.write(
                f"The User whose friends are being collected '{self.query['friends_user']['screen_name']}'."
            )
            st.write(
                f"Today's day of year is : {int(datetime.datetime.today().strftime('%j'))}."
            )

            x = [
                self.query['duration'], self.query['last_day'],
                self.query['quantity'], self.query['tweets_status'],
                self.query['tweets_user'], self.query['user_friends_status'],
                self.query['year']
            ]

            df = pd.DataFrame(x,
                              index=[
                                  'Tweets Collected Time Frame',
                                  'Tweets Collected Last Day',
                                  'Tweets Collected',
                                  'Tweets Collection Status',
                                  'Tweets Collected Latest User',
                                  'Friends Collection Status', 'Year'
                              ])

            st.dataframe(df)

        except Exception as e:
            pass
def index():

    query = dict(request.get_json())

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    mongo_app.db.users.remove({})

    tweets = tweet_objects.tweet_set()

    #todays day year
    day = int(datetime.date.today().strftime('%j'))
    today = datetime.date.today()

    if day - 7 >= 0:

        query = {
            '$and': [{
                'created_day': {
                    '$gt': day - 7
                }
            }, {
                'created_year': today.year
            }]
        }
        tweets.get_tweets(query)

    else:

        query = {
            '$and': [{
                'created_day': {
                    '$gt': 0
                }
            }, {
                'created_year': today.year
            }]
        }
        tweets.get_tweets(query)

        last_year_days = int(
            datetime.datetime(today.year - 1, 12, 31).strftime('%j'))
        query = {
            '$and': [{
                'created_day': {
                    '$gt': last_year_days + day - 7
                }
            }, {
                'created_year': today.year - 1
            }]
        }
        tweets.get_tweets(query)

    tweets.preprocess_tweets()

    mute_users = get_mute_users()

    existing_users = user_objects.user_set()
    user_query = {}
    existing_users.get_users(user_query)

    mute_users += [user.user['screen_name'] for user in existing_users.users]

    discovered_users = []
    for tweet in tweets.tweets:
        discovered_users += tweet.tweet_mentions

    discovered_users = Counter(discovered_users)

    for user_screen_name in mute_users:

        try:
            discovered_users.pop(user_screen_name)
        except:
            pass

    final_users = []
    for k, v in discovered_users.items():

        temp = {}

        temp['screen_name'] = k
        temp['activity'] = v

        final_users.append(temp)

    final_users = final_users[:30]

    #extra processing
    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    for user in final_users:

        user['core_user'] = None
        user['status'] = None
        user['created_year'] = int(datetime.datetime.now().year)
        user['created_day'] = int(datetime.datetime.now().strftime('%j'))
        user['made_user'] = False

        mongo_app.db.users.insert(user)

    return ('done')
Esempio n. 22
0
    def make_added_users(self):

        if (st.button('Make Added Users')):

            mongo_app = connections.mongo()
            mongo_app.connect_to_mongo()

            today = datetime.date.today()

            members = []
            for i in mongo_app.db.users.find({
                    "status":
                    "Add",
                    "created_year":
                    int(today.year),
                    "created_day":
                    int(today.strftime('%j')),
                    "made_user":
                    False
            }):
                members.append(i['screen_name'])

            payload = {
                "status": "Add",
                "created_year": int(today.year),
                "created_day": int(today.strftime('%j')),
                "made_user": False
            }
            url = "http://usermaker:5000/"
            with st.spinner('Wait making users'):

                session = requests.Session()
                session.trust_env = False

                report = session.post(url, json=payload)

                if (report.status_code == 200):
                    st.success('Made users 😉' + ' ' + str(report.text))
                else:
                    st.success('Please try again 😢')

            #collecting their tweets
            with st.spinner('Collecting tweets...'):

                #collecting tweets
                session = requests.Session()
                session.trust_env = False

                user_query = {'user.screen_name': {'$in': members}}

                tweets_payload = {
                    'days': 7,
                    'user_payload': user_query,
                    'source': 'add'
                }
                url = "http://datacollection:5010/tweets"

                report = session.post(url, json=tweets_payload)

                time.sleep(5)

                user_friends_query = {
                    '$and': [{
                        'day_added':
                        int(datetime.datetime.now().strftime('%j'))
                    }, {
                        'year_added': int(datetime.datetime.now().year)
                    }]
                }
                user_payload = {
                    'type': 'new',
                    'user_friends_payload': user_friends_query,
                    'source': 'add'
                }
                url = "http://datacollection:5010/user_friends"

                report = session.post(url, json=user_payload)
Esempio n. 23
0
def tweets_celery_collector(self, query):

    #connecting to db
    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    #today
    today = datetime.date.today()

    #connecting to twitter
    api_keys = connections.twitter_api_keys()
    api_keys.get_existing_keys(no_of_keys=1)

    api = api_keys.keys[0].connect_to_twitter()

    #setting time period
    todaydt = datetime.datetime(today.year, today.month, today.day, 0, 0, 0)
    startDate = todaydt - datetime.timedelta(days=query['days'])
    endDate = todaydt - datetime.timedelta(days=2)

    #set status in db
    update_query = {
        '$set': {
            f"last_day.{query['source']}": int(today.strftime('%j')),
            f"tweets_status.{query['source']}": 'collecting',
            f"duration.{query['source']}": f"{startDate} - {endDate}"
        }
    }
    mongo_app.db.reports.update({'report': 'data_collection'}, update_query)

    #getting users

    users = user_objects.user_set()

    payload = query['user_payload']

    users.get_users(payload)

    #collect tweets
    tweets = []
    i = 0
    j = 1
    count = 0
    for user in users.users:

        i = i + 1
        try:
            tmpTweets = api.user_timeline(user.user['screen_name'])
        except tweepy.TweepError as e:

            try:
                if e.reason[0:22] == 'Failed to send request':
                    return ('No Network')
            except:
                print('Skipping' + user.user['screen_name'])

        print(i, user.user['screen_name'], j)
        for tweet in tmpTweets:
            if tweet.created_at < endDate and tweet.created_at > startDate:
                tweet._json['created_day'] = int(
                    tweet.created_at.strftime('%j'))
                tweet._json['created_year'] = tweet.created_at.year
                tweet._json['tweeted_hour'] = int(
                    tweet._json['created_at'][11:13])
                tweets.append(tweet)

        try:
            while (tmpTweets[-1].created_at > startDate):
                print("Last Tweet @", tmpTweets[-1].created_at,
                      " - fetching some more")

                i = i + 1

                try:
                    tmpTweets = api.user_timeline(user.user['screen_name'],
                                                  max_id=tmpTweets[-1].id)
                except tweepy.TweepError as e:
                    try:
                        if e.reason[0:22] == 'Failed to send request':
                            return ('No Network')
                    except:
                        print('Skipping' + user.user['screen_name'])

                print(i, user.user['screen_name'], j)
                for tweet in tmpTweets:
                    if tweet.created_at < endDate and tweet.created_at > startDate:
                        tweet._json['created_day'] = int(
                            tweet.created_at.strftime('%j'))
                        tweet._json['created_year'] = tweet.created_at.year
                        tweet._json['tweeted_hour'] = int(
                            tweet._json['created_at'][11:13])
                        tweets.append(tweet)
        except IndexError:
            print('*=*=*=*=  NO TWEETS BY  *=*=*=*=*=', user, j)
        j = j + 1

        #updating status in reports
        mongo_app.db.reports.update({'report': 'data_collection'}, {
            '$set': {
                f"tweets_user.{query['source']}": user.user['screen_name']
            }
        })

    #pulling json part of tweets status collected
    tweets_json = []
    for status in tweets:
        tweets_json.append(status._json)

    #removing duplicates
    ids = {}
    duplicates = []
    index = 0

    for tweet in tweets_json:

        if tweet['id'] in ids:
            duplicates.append(index)
        else:
            ids[tweet['id']] = 0

        index += 1

    for index in sorted(duplicates, reverse=True):
        del tweets[index]

    #inserting to database
    i = 0
    for tweet in tweets_json:

        tweet['downloaded_day_year'] = int(today.strftime('%j'))
        tweet['downloaded_year'] = int(today.year)
        if '_id' in tweet:
            tweet.pop('_id')
            print(i)

        if not mongo_app.db.tweets.find({'id': tweet['id']}).count() > 0:
            mongo_app.db.tweets.insert_one(tweet)
            count += 1
        i = i + 1

    #set status in db
    mongo_app.db.reports.update({'report': 'data_collection'}, {
        '$set': {
            f"tweets_status.{query['source']}": 'collected',
            f"quantity.{query['source']}": count
        }
    })
Esempio n. 24
0
def user_friends_celery_collector(self, query):

    #connecting to db
    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    #today
    today = datetime.date.today()

    #connecting to twitter
    api_keys = connections.twitter_api_keys()
    api_keys.get_existing_keys()

    #set status in db
    update_query = {
        '$set': {
            f"last_day.{query['source']}": int(today.strftime('%j')),
            f"user_friends_status.{query['source']}": 'collecting'
        }
    }
    mongo_app.db.reports.update({'report': 'data_collection'}, update_query)

    #getting users

    users = user_objects.user_set()

    payload = query['user_friends_payload']

    users.get_users(payload)

    #checking if it stopped in between
    reports = []
    for report in mongo_app.db.reports.find({'report': 'data_collection'}):
        reports.append(report)

    if len(reports) != 0:

        if reports[0]['friends_user']['type'] == query['type']:

            if reports[0]['friends_user']['user_no'] == len(users.users) - 1:
                user_no = 0
            elif reports[0]['friends_user']['user_no'] == 0:
                user_no = 0
            else:
                user_no = reports[0]['friends_user']['user_no'] - 1

        else:
            user_no = 0

        key_no = 0
        api = api_keys.keys[key_no].connect_to_twitter_no_wait()
        while user_no < len(users.users):

            try:

                i = 0
                friends = []
                for id in tweepy.Cursor(api.friends_ids,
                                        screen_name=users.users[user_no].
                                        user['screen_name']).items():
                    friends.append({'id': id})
                    i = i + 1
                #Printing Status
                print(i, users.users[user_no].user['screen_name'], user_no)
                #updating dB
                mongo_app.db.community.update_one(
                    {'_id': users.user_ids[user_no]},
                    {'$set': {
                        'friends_id': friends
                    }})

                #writing status to reports
                mongo_app.db.reports.update({'report': 'data_collection'}, {
                    '$set': {
                        'friends_user': {
                            'screen_name':
                            users.users[user_no].user['screen_name'],
                            'user_no': user_no,
                            'type': query['type']
                        }
                    }
                })

                user_no = user_no + 1

            except tweepy.RateLimitError:

                time.sleep(60)

                key_no = (key_no + 1) % (len(api_keys.keys))
                api = api_keys.keys[key_no].connect_to_twitter_no_wait()
                print(key_no)
                print(88)

            except Exception as e:

                try:
                    if e.reason[0:22] == 'Failed to send request':
                        return ('No Network')
                except:
                    print('skipping ' +
                          users.users[user_no].user['screen_name'])
                    print(e)
                    user_no += 1

        #set status in db
        mongo_app.db.reports.update(
            {'report': 'data_collection'},
            {'$set': {
                f"user_friends_status.{query['source']}": 'collected'
            }})
Esempio n. 25
0
    def finilize(self):

        st.write('---')
        st.write('## Finilize Users')
        st.write('Finilize the actions taken above.')
        if st.button('Finilize Users'):

            today = datetime.date.today()

            #getting receantly added users
            mongo_app = connections.mongo()
            mongo_app.connect_to_mongo()

            members = []
            for i in mongo_app.db.users.find({
                    "status":
                    "Discover",
                    "created_year":
                    int(today.year),
                    "created_day":
                    int(today.strftime('%j')),
                    "made_user":
                    False
            }):
                members.append(i['screen_name'])

            #making users
            with st.spinner('Making Users...'):

                payload = {
                    "status": "Discover",
                    "created_year": int(today.year),
                    "created_day": int(today.strftime('%j')),
                    "made_user": False
                }
                url = "http://usermaker:5000/"

                session = requests.Session()
                session.trust_env = False

                report = session.post(url, json=payload)

                if (report.status_code == 200):
                    st.success('Made users 😉' + ' ' + str(report.text))
                else:
                    st.success('Please try again 😢')

            #muting users
            with st.spinner('Muting Users...'):

                payload = {}
                url = "http://muteusers:5040/"

                session = requests.Session()
                session.trust_env = False

                report = session.post(url, json=payload)

                if (report.status_code == 200):
                    st.success('Muted Users 😶')
                else:
                    st.success('Please try again 😢')

            #collecting their tweets
            with st.spinner('Collecting tweets...'):

                #collecting tweets
                session = requests.Session()
                session.trust_env = False

                user_query = {'user.screen_name': {'$in': members}}

                tweets_payload = {
                    'days': 7,
                    'user_payload': user_query,
                    'source': 'discover'
                }
                url = "http://datacollection:5010/tweets"

                report = session.post(url, json=tweets_payload)

                time.sleep(5)

                user_friends_query = {
                    '$and': [{
                        'day_added':
                        int(datetime.datetime.now().strftime('%j'))
                    }, {
                        'year_added': int(datetime.datetime.now().year)
                    }]
                }
                user_payload = {
                    'type': 'new',
                    'user_friends_payload': user_friends_query,
                    'source': 'discover'
                }
                url = "http://datacollection:5010/user_friends"

                report = session.post(url, json=user_payload)
Esempio n. 26
0
    def get_details(self):

        st.write(
            'Please **adjust** the below **values** to get **best Clusters**')

        label = 'Weight for plane Tweet'
        self.tweet_weight = st.slider(label,
                                      min_value=0.000,
                                      max_value=10000.000,
                                      value=0.010,
                                      step=0.001)

        label = 'Weight for Tweet with mention'
        self.tweet_mention_weight = st.slider(label,
                                              min_value=0.000,
                                              max_value=10000.000,
                                              value=0.100,
                                              step=0.001)

        label = 'Weight for retweet'
        self.retweet_weight = st.slider(label,
                                        min_value=0.000,
                                        max_value=10000.000,
                                        value=1.000,
                                        step=0.001)

        label = 'Weight for Quote'
        self.quote_weight = st.slider(label,
                                      min_value=0.000,
                                      max_value=10000.000,
                                      value=10.000,
                                      step=0.001)

        label = 'Weight for reply'
        self.reply_weight = st.slider(label,
                                      min_value=0.000,
                                      max_value=10000.000,
                                      value=100.000,
                                      step=0.001)

        st.write(
            'During testing the default weights were giving good results,')
        st.write(
            'If you do not get good Clusters please adjust above weights.')

        mongo_app = connections.mongo()
        mongo_app.connect_to_mongo()

        if st.button('Done'):

            reports = []
            for i in mongo_app.db.reports.find({'report': 'preprocess'}):
                reports.append(i)

            if len(reports) == 0:

                mongo_app.db.reports.insert({
                    'report': 'preprocess',
                    'tweet_weight': self.tweet_weight,
                    'tweet_mention_weight': self.tweet_mention_weight,
                    'retweet_weight': self.retweet_weight,
                    'quote_weight': self.quote_weight,
                    'reply_weight': self.reply_weight
                })

            else:
                mongo_app.db.reports.update({'report': 'preprocess'}, {
                    '$set': {
                        'tweet_weight': self.tweet_weight,
                        'tweet_mention_weight': self.tweet_mention_weight,
                        'retweet_weight': self.retweet_weight,
                        'quote_weight': self.quote_weight,
                        'reply_weight': self.reply_weight
                    }
                })

        try:
            reports = []
            for i in mongo_app.db.reports.find({'report': 'preprocess'}):
                reports.append(i)

            st.write(
                '- ' +
                f"{reports[0]['tweet_weight']}, this is the amount of bond strength gained by the followers of, if the user tweets a simple tweet."
            )
            st.write(
                '- ' +
                f"{reports[0]['tweet_mention_weight']}, this is the amount of bond strength gained by a twitter user, if a user mentions the twitter user in his tweet."
            )
            st.write(
                '- ' +
                f"{reports[0]['retweet_weight']}, this is the amount of bond strength gained by a twitter user, if a user retweets a tweet of a twitter user."
            )
            st.write(
                '- ' +
                f"{reports[0]['quote_weight']}, this is the amount of bond strength gained by a twitter user, if a user quotes a tweet of a twitter user."
            )
            st.write(
                '- ' +
                f"{reports[0]['reply_weight']}, this is the amount of bond strength gained by a twitter user, if a user replies to a tweet of a twitter user."
            )
        except:
            pass
def discover(self, discover_query):

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    cluster_number = discover_query['cluster_number']

    vector_template = []
    tweet_vectors = []

    users_t1 = user_objects.user_set()
    query = {'cluster_number': cluster_number}
    users_t1.get_users(query)
    users_screen_name_t1 = [i.user['screen_name'] for i in users_t1.users]

    users_t2 = user_objects.user_set()
    query = {'cluster_number': {'$ne': cluster_number}}
    users_t2.get_users(query)
    users_screen_name_t2 = [i.user['screen_name'] for i in users_t2.users]

    #setting Dates
    today = datetime.date.today()
    seven_days_back = today - datetime.timedelta(days=7)

    tweets_t1 = tweet_objects.tweet_set()

    start = 0
    for user in users_t1.users:

        if seven_days_back.year == today.year:

            query = {
                '$and': [{
                    'user.id': user.user['id']
                }, {
                    '$and': [{
                        'created_day': {
                            '$gt': int(seven_days_back.strftime('%j'))
                        }
                    }, {
                        'created_year': seven_days_back.year
                    }]
                }, {
                    '$and': [{
                        'created_day': {
                            '$lte': int(today.strftime('%j'))
                        }
                    }, {
                        'created_year': today.year
                    }]
                }]
            }

            tweets_t1.get_tweets(query)

        else:

            query = {
                '$and': [{
                    'user.id': user.user['id']
                }, {
                    '$and': [{
                        'created_day': {
                            '$gt': int(seven_days_back.strftime('%j'))
                        }
                    }, {
                        'created_year': seven_days_back.year
                    }]
                }]
            }

            tweets_t1.get_tweets(query)

            query = {
                '$and': [{
                    'user.id': user.user['id']
                }, {
                    '$and': [{
                        'created_day': {
                            '$lte': int(today.strftime('%j'))
                        }
                    }, {
                        'created_year': today.year
                    }]
                }]
            }

            tweets_t1.get_tweets(query)

        end = len(tweets_t1.tweets)

        tweets_t1.preprocess_tweets_range(start, end)

        ut1 = {}
        ut2 = {}
        ut3 = {}

        vector_template.append(user.user['screen_name'])

        for tweet in tweets_t1.tweets[start:end]:

            t1 = 0
            t2 = 0
            t3 = 0

            for member in tweet.tweet_mentions:

                vector_template.append(member)

                if member in users_screen_name_t1:

                    t1 += 1
                    if member not in ut1:
                        ut1[member] = 1
                    else:
                        ut1[member] += 1

                elif member in users_screen_name_t2:

                    t2 += 1
                    if member not in ut2:
                        ut2[member] = 1
                    else:
                        ut2[member] += 1

                else:

                    t3 += 1
                    if member not in ut3:
                        ut3[member] = 1
                    else:
                        ut3[member] += 1

            t1 += 1

            maxt = max(t1, t2, t3)

            if maxt == t1:
                tier = 1
            elif maxt == t2:
                tier = 2
            else:
                tier = 3

            tweet.tweet['tier'] = tier

        if len(ut1) == 0:
            t1_screen_name = ''
            t1_interactions = 0
        else:
            t1_screen_name = max(ut1)
            t1_interactions = ut1[max(ut1)]

        if len(ut2) == 0:
            t2_screen_name = ''
            t2_interactions = 0
        else:
            t2_screen_name = max(ut2)
            t2_interactions = ut2[max(ut2)]

        if len(ut3) == 0:
            t3_screen_name = ''
            t3_interactions = 0
        else:
            t3_screen_name = max(ut3)
            t3_interactions = ut3[max(ut3)]

        tier1 = {
            'screen_name': t1_screen_name,
            'interactions': t1_interactions
        }
        tier2 = {
            'screen_name': t2_screen_name,
            'interactions': t2_interactions
        }
        tier3 = {
            'screen_name': t3_screen_name,
            'interactions': t3_interactions
        }

        query = {'user.screen_name': user.user['screen_name']}
        payload = {
            '$set': {
                'bond_stats': {
                    'tier1': tier1,
                    'tier2': tier2,
                    'tier3': tier3
                }
            }
        }
        mongo_app.db.community.update(query, payload)
        start = end

    vector_template = list(set(vector_template))

    for tweet in tweets_t1.tweets:

        vector = []
        for member in vector_template:

            if ((member in tweet.tweet_mentions)
                    or (member == tweet.tweet_user_screen_name)):
                time = 1 + float(tweet.tweet['created_day'] / 4) + float(
                    0.25 * tweet.tweet['tweeted_hour'] / 24)
            else:
                time = 0

            vector.append(time)

        day = float(tweet.tweet['created_day'] / 4)
        hour = float(0.25 * tweet.tweet['tweeted_hour'] / 24)

        vector.append(day)
        vector.append(hour)

        tweet_vectors.append(vector)

    if len(tweet_vectors) == 0:
        return (f'No tweets - {cluster_number}')

    #clustering
    df = pd.DataFrame(tweet_vectors)
    clustring = DBSCAN(eps=0.25, min_samples=1).fit(df)

    for i in range(len(clustring.labels_)):

        tweets_t1.tweets[i].tweet['thread_number'] = clustring.labels_[i]

    tweet_df = pd.DataFrame([tweet.tweet for tweet in tweets_t1.tweets])

    columns_to_remove = [
        '_id', 'id_str', 'truncated', 'entities', 'source',
        'in_reply_to_status_id', 'in_reply_to_status_id_str',
        'in_reply_to_user_id', 'in_reply_to_user_id_str',
        'in_reply_to_screen_name', 'geo', 'coordinates', 'place',
        'contributors', 'is_quote_status', 'favorited', 'retweeted', 'lang',
        'downloaded_day_year', 'downloaded_year', 'extended_entities',
        'possibly_sensitive', 'quoted_status_id', 'quoted_status_id_str',
        'quoted_status', 'retweeted_status'
    ]

    for column in columns_to_remove:
        try:
            tweet_df.drop(column, axis=1, inplace=True)
        except Exception as e:
            pass

    tweet_df['screen_name'] = tweet_df['user'].apply(
        lambda x: x['screen_name'])
    tweet_df['friends_count'] = tweet_df['user'].apply(
        lambda x: x['friends_count'])
    tweet_df['followers_count'] = tweet_df['user'].apply(
        lambda x: x['followers_count'])
    tweet_df.drop('user', axis=1, inplace=True)

    tweet_df['tweeted_at'] = tweet_df.apply(
        lambda row: datetime.datetime(row.created_year, 1, 1, row.tweeted_hour)
        + datetime.timedelta(row.created_day - 1),
        axis=1)

    tweet_df['tweet_url'] = tweet_df.apply(
        lambda row: f'https://twitter.com/{row.screen_name}/status/{row.id}',
        axis=1)

    tier = 1
    while (tier <= 3):

        temp_df = tweet_df[tweet_df['tier'] == tier]

        if len(temp_df) == 0:
            print(
                f'{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number},{tier}-{cluster_number}'
            )
        else:
            print(
                f'{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*,{tier}-{cluster_number}*'
            )
        thread_df = {}

        thread_df['thread_number'] = list(
            temp_df.groupby('thread_number').count().reset_index()
            ['thread_number'])

        time_stamps = []
        count = []
        for i in thread_df['thread_number']:

            time_df = temp_df[temp_df['thread_number'] == i]
            time_stamps.append(min(time_df['tweeted_at']))
            count.append(len(time_df))

        thread_df['time_stamps'] = time_stamps
        thread_df['count'] = count

        thread_df = pd.DataFrame(thread_df)

        plot_df = pd.DataFrame(
            thread_df.groupby('time_stamps').count().reset_index()
            ['time_stamps'])
        plot_df['thread_count'] = thread_df.groupby(
            'time_stamps').count().reset_index()['count']
        plot_df['tweet_count'] = thread_df.groupby(
            'time_stamps').sum().reset_index()['count']

        stamps = list(plot_df[plot_df['thread_count'] > (
            plot_df['thread_count'].mean() +
            2 * plot_df['thread_count'].std())]['time_stamps'])

        for i in thread_df['thread_number']:

            time_df = temp_df[temp_df['thread_number'] == i]

            time_stamp = min(time_df['tweeted_at'])

            if time_stamp in stamps:

                for j in range(len(time_df)):

                    id = int(time_df.iloc[j]['id'])
                    temp_tier = int(time_df.iloc[j]['tier'])
                    thread_number = i
                    discover = True

                    query = {'id': id}
                    payload = {
                        '$set': {
                            'tier': temp_tier,
                            'thread_number': thread_number,
                            'discover': discover,
                            'cluster_number': cluster_number
                        }
                    }

                    mongo_app.db.tweets.update(query, payload)

            else:

                for j in range(len(time_df)):

                    id = int(time_df.iloc[j]['id'])
                    temp_tier = int(time_df.iloc[j]['tier'])
                    thread_number = i
                    discover = False

                    query = {'id': id}
                    payload = {
                        '$set': {
                            'tier': temp_tier,
                            'thread_number': thread_number,
                            'discover': discover,
                            'cluster_number': cluster_number
                        }
                    }

                    mongo_app.db.tweets.update(query, payload)

        tier += 1
def main():

    mongo_app = connections.mongo()
    mongo_app.connect_to_mongo()

    iteration = 0

    while True:

        time.sleep(60 * 30)

        hour = int(datetime.datetime.now().hour)
        day = int(datetime.datetime.now().strftime('%j'))

        reports = []
        for report in mongo_app.db.reports.find({'report': 'data_collection'}):
            reports.append(report)

        if len(reports) != 0:

            if ((reports[0]['last_day']['scheduler'] != day)
                    and (reports[0]['scheduled_hour'] < hour)):

                session = requests.Session()
                session.trust_env = False

                user_query = {}

                lastDate = datetime.datetime(
                    reports[0]['year']['scheduler'], 1, 1
                ) + datetime.timedelta(reports[0]['last_day']['scheduler'] - 1)
                days = datetime.datetime.now() - lastDate

                if days.days + 2 >= 7:
                    days = 7
                else:
                    days = days.days + 2

                tweets_payload = {
                    'days': days,
                    'user_payload': user_query,
                    'source': 'scheduler'
                }
                url = "http://datacollection:5010/tweets"

                report = session.post(url, json=tweets_payload)

                time.sleep(5)

                user_friends_query = {}
                user_payload = {
                    'type': 'old',
                    'user_friends_payload': user_friends_query,
                    'source': 'scheduler'
                }
                url = "http://datacollection:5010/user_friends"

                report = session.post(url, json=user_payload)

                today = datetime.datetime.now()
                month_back = today - datetime.timedelta(days=30)

                month_back_day = month_back.strftime('%j')
                month_back_year = month_back.year

                mongo_app.db.tweets.remove(
                    {'created_year': month_back_year - 1})

                mongo_app.db.tweets.remove({
                    '$and': [{
                        'created_year': month_back_year
                    }, {
                        'created_day': {
                            '$lt': month_back_day
                        }
                    }]
                })

            elif iteration != 0:

                try:

                    if ((reports[0]['last_day']['scheduler'] == day)
                            and (reports[0]['tweets_status']['scheduler']
                                 == 'collecting')
                            and (reports[0]['tweets_user']['scheduler']
                                 == old_user_tweets)):

                        url = "http://datacollection:5010/tweets"

                        report = session.post(url, json=tweets_payload)

                    if ((reports[0]['last_day']['scheduler'] == day)
                            and (reports[0]['user_friends_status']['scheduler']
                                 == 'collecting')
                            and (reports[0]['friends_user']['screen_name']
                                 == old_user_friends)):

                        url = "http://datacollection:5010/user_friends"

                        report = session.post(url, json=user_payload)

                except Exception as e:
                    pass

            old_user_tweets = reports[0]['tweets_user']['scheduler']
            old_user_friends = reports[0]['friends_user']['screen_name']
            iteration += 1