Example #1
0
def get_users_data(users):
    # safety parameter to changing credentials files
    safety = 0
    # getting access file
    access_file, safety = get_access_file(safety)
    # creating connection with Twitter API
    client = get_twitter_client(access_file)
    counter = 0
    for user in users:
        if counter % 50 == 0:
            print(str(counter) + " DONE")
        counter += 1
        try:
            # if user details already exists in database skip user
            users_collection.find_one({"_id": user})['Followers Count']
        except:
            #otherwise try to download them
            try:
                account = client.get_user(user)
                user_details = get_user_details(account)
                average_per_day, average_per_hour = get_user_tweets_number(
                    account, client)
                user_details['Tweets per day'] = average_per_day
                user_details['Tweets per hour'] = average_per_hour
                query = {"_id": user}
                # if users exists in database but without details
                # append details
                if users_collection.find_one({"_id": user}):
                    values = {"$set": user_details}
                    users_collection.update_one(query, values)
                # otherwise insert new user
                else:
                    users_collection.insert_one(user_details)
            except tweepy.TweepError as e:
                # if Twitter API limit is reached, change access file
                if e.response.text == '{"errors":[{"message":"Rate limit exceeded","code":88}]}':
                    print(e.response.text)
                    print('CHANGING ACCESS FILE TO ' + str(safety) + '!')
                    access_file, safety = get_access_file(safety)
                    client = get_twitter_client(access_file)
                    # try to download with new access file
                    try:
                        user_details = get_user_details(account)
                        average_per_day, average_per_hour = get_user_tweets_number(
                            account, client)
                        user_details['Tweets per day'] = average_per_day
                        user_details['Tweets per hour'] = average_per_hour
                        query = {"_id": user}
                        # if users exists in database but without details
                        # append details
                        if users_collection.find_one({"_id": user}):
                            values = {"$set": user_details}
                            users_collection.update_one(query, values)
                        # otherwise insert new user
                        else:
                            users_collection.insert_one(user_details)
                    except tweepy.TweepError:
                        pass
Example #2
0
def get_single_user(screen_name):

    # safety parameter to changing credentials files
    safety = 0
    # getting access file
    access_file, safety = get_access_file(safety)
    # creating connection with Twitter API
    client = get_twitter_client(access_file)
    try:
        # if user details already exists in database get them
        users_collection.find_one({"_id": screen_name})['Verified']
        user_details = users_collection.find_one({"_id": screen_name})
    except:
        # otherwise download user details
        account = client.get_user(screen_name)
        user_details = get_user_details(account)
        average_per_day, average_per_hour = get_user_tweets_number(
            account, client)
        user_details['Tweets per day'] = average_per_day
        user_details['Tweets per hour'] = average_per_hour
        query = {"_id": screen_name}
        # if users exists in database but without details
        # append details
        if users_collection.find_one({"_id": screen_name}):
            values = {"$set": user_details}
            users_collection.update_one(query, values)
        # otherwise insert new user
        else:
            users_collection.insert_one(user_details)
    return user_details
def get_user_timeline(user):

    fname = 'temp_user_timeline_{}.jsonl'.format(user)
    client = get_twitter_client()
    print("Downloading timeline...")
    try:
        with open(fname, 'w') as f:
            for page in Cursor(client.user_timeline,
                               screen_name=user,
                               count=200).pages(16):
                sys.stdout.write(spinner.__next__())
                sys.stdout.flush()
                sys.stdout.write('\b')
                for status in page:
                    f.write(json.dumps(status._json) + '\n')
            print("Done.")
        with open('temp_user.json', 'w') as f:
            t = datetime.now()
            t = t.strftime("%A, %d. %B %Y %I:%M%p")
            data_user = {'user': user, 'date': t}
            f.write(json.dumps(data_user))
        return True
    except Exception as e:
        print("Error:")
        print(e)
        return False
Example #4
0
def get_all_tweets(screen_name):
    client = get_twitter_client()
    alltweets = []
    new_tweets = client.user_timeline(screen_name=screen_name, count=200)
    alltweets.extend(new_tweets)
    oldest = alltweets[-1].id - 1
    for i in range(0, 5):
        new_tweets = client.user_timeline(screen_name=screen_name,
                                          count=200,
                                          max_id=oldest)
        alltweets.extend(new_tweets)
        oldest = alltweets[-1].id - 1
    jsonResults = {
        "screen_name": client.get_user(uscreen_name=screen_name).screen_name,
        "tweets": []
    }

    for tweet in alltweets:
        tweet = {
            "id": tweet.id_str,
            "coordinates": tweet.coordinates,
            "text": tweet.text,
            "retweet_count": tweet.retweet_count,
            "created_at": tweet.created_at.isoformat(),
            "favorite_count": tweet.favorite_count,
        }
        jsonResults["tweets"].append(tweet)
        print(len(jsonResults["tweets"]))
    return jsonResults
Example #5
0
def get_followers(username, max_followers, output_file):
    print("Getting <{}> followers".format(username))
    client = get_twitter_client()
    max_pages = math.ceil(max_followers / 5000)
    count = 0
    json_file = output_file.split('.')[0] + '_full.json'
    with open(json_file, 'w') as json_output:
        with progressbar.ProgressBar(
                max_value=progressbar.UnknownLength) as bar:
            for followers in Cursor(client.followers_ids,
                                    screen_name=username).pages(max_pages):
                for chunk in paginate(followers, 100):
                    try:
                        users = client.lookup_users(user_ids=chunk)
                        for user in users:
                            user_info = user._json
                            screen_name = user_info['screen_name']
                            with open(output_file, 'a') as txt_output:
                                txt_output.write(screen_name + '\n')
                            json_output.write(json.dumps(user._json) + '\n')
                            count += 1
                            bar.update(count)
                    except:
                        pass
                if len(followers) == 5000:
                    time.sleep(60)
    print("<{}> followers completed".format(username))
    time.sleep(60)
def check_by_tweets(screen_name, access_file):

    client = get_twitter_client(access_file)

    tweets = []
    positive = []
    negative = []

    ids = []
    dates = []

    check = tweets_collection.count_documents({"_id": screen_name})

    if check == 1:
        print('Already in database!')
    else:
        try:
            current_user = client.get_user(screen_name)

            if not current_user.protected:
                for page in Cursor(client.user_timeline,
                                   screen_name=screen_name,
                                   count=200).pages(16):
                    for status in page:
                        ids.append(str(status.id))
                        dates.append(status.created_at)
                        tweets.append(status.text)

                tweet_data = {
                    "_id": screen_name,
                    screen_name: {
                        "_id": ids,
                        "Text": tweets,
                        "Created at": dates
                    }
                }
                tweets_collection.insert_one(tweet_data)
            else:
                tweet_data = {"_id": screen_name}
                tweets_collection.insert_one(tweet_data)

        except tweepy.error.TweepError:
            print(screen_name + ' NOT EXISTS!')

    for tweet in tweets:
        result = check_tweet(tweet)

        if result:
            positive.append(1)
        else:
            negative.append(1)

    if len(positive) > len(negative):
        return True
    else:
        return False
def pull_timeline(user):
    client = get_twitter_client()

    file_name = "tweets_{}.json".format(user)

    with open(file_name, 'w') as f:
        for page in Cursor(client.user_timeline, screen_name=user,
                           count=200).pages():
            for status in page:
                f.write(json.dumps(status._json) + '\n')
    return file_name
Example #8
0
def get_timeline():
    user = sys.argv[1]
    client = get_twitter_client()
    m = re.compile(
        r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    )

    #Storing some of the recent tweets into a file in json format

    fname = "user_timeline_{}.json1".format(user)
    with open(fname, 'w') as f:
        for page in Cursor(client.user_timeline, screen_name=user,
                           count=50).pages(4):
            for status in page:
                f.write(json.dumps(status._json) + "\n")
    f.close()
    with sqlite3.connect("tweet.db") as con:
        cur = con.cursor()
        cur.execute("DROP TABLE tweeturl")
        cur.execute(
            "CREATE TABLE tweeturl(tid INT PRIMARY KEY NOT NULL,url TEXT NOT NULL,created_at TEXT)"
        )
        con.commit()
        #con.close()
    tweets_file = open(fname, "r")
    for line in tweets_file:  #opening the above file accessing the required data like tweet id,tweet url and tweet time into the database
        try:
            tweet = json.loads(line.strip())
            if 'text' in tweet:
                u = tweet['user']['id']
                print(u)
                i = tweet['id']
                #print(i)
                t = tweet['created_at']
                #print(t)
                p = m.findall(tweet['text'])
                #print(p)
                try:
                    with sqlite3.connect("tweet.db") as con:
                        cur = con.cursor()
                        cur.execute(
                            "INSERT INTO tweeturl(tid,url,created_at)VALUES(?,?,?)",
                            (i, p[0], t))
                        con.commit()
                        print("Record successfully added")
                except:
                    con.rollback()
                    print("error in insert operation")
                    con.close()
        except:
            continue
Example #9
0
def get_sinlge_user(screen_name, access_file):

    client = get_twitter_client(access_file)

    account = client.get_user(screen_name)

    users_data = get_user_data(account)

    try:
        users_collection.insert_one(users_data)
    except pymongo.errors.DuplicateKeyError:
        pass

    return users_data
def twitter_get_timeline2(user_list, numbers_of_tweets, dir_name):    
    client = get_twitter_client()
    n = numbers_of_tweets
    
    dirname = "tweets/{}".format(dir_name)
    os.makedirs(dirname, mode=0o755, exist_ok=True)
    
    for x in range(0, len(user_list), 20):
        chunks = user_list[x:x+20]         
        for entry in chunks: 
            fname = "{}/user_timeline_{}.jsonl".format(dirname, entry)
            with open(fname, 'w') as f:
                for tweet in Cursor(client.user_timeline, screen_name = entry).items(n):
                        f.write(json.dumps(tweet._json)+"\n")
            print(entry)
def getting_timeline(user):
    client = get_twitter_client()  #Seems to be the rate limiting step

    fname = "user_timeline_{}.jsonl".format(user)

    with open(fname, 'w') as f:
        for page in Cursor(
                client.user_timeline, screen_name=user,
                count=200).pages(16):  #Up to 3200 Tweets (Twitter limit?*)
            counter = 0
            for status in page:
                if counter % 4 == 0:  #Tried collecting smaller subsections of Tweets.
                    f.write(json.dumps(status._json) + "\n")
                    counter += 1
                else:
                    counter += 1
def user_search(user_list):
    client = get_twitter_client()
       
    #Load user IDs
    dataset = pd.read_csv(user_list, encoding = "ISO-8859-1")
    x = dataset.iloc[:,1]
    ids = []
    for id in x:
        ids.append(id)   
    #ACCESS THE LOOKUP_USER METHOD OF THE TWITTER API -- GRAB INFO ON UP TO 100 IDS WITH EACH API CALL
    #THE VARIABLE USERS IS A JSON FILE WITH DATA ON THE 32 TWITTER USERS LISTED 
    for x in range(0, len(ids), 100):
        chunks = ids[x:x+100]
        
        for user in chunks:
            profile = client.get_user(screen_name = user)
            users.append(profile._json)
            print(user)
def pull_timeline(user_name):
    client = get_twitter_client()

    file_name = "tweets_{}.json".format(user_name)

    path = os.path.join(root + file_name)

    if user_name not in handle_dict:
        print("You're going to want to add this to handle_file_dict:\n")
        print("'{}' : '{}'".format(user_name, file_name))

    with open(path, 'w') as f:
        for page in Cursor(client.user_timeline,
                           screen_name=user_name,
                           tweet_mode='extended',
                           count=200).pages():
            for status in page:
                f.write(json.dumps(status._json) + '\n')
    return file_name
def get_links(database, collection):
    api = get_twitter_client()
    client = MongoClient()
    db = client[database]
    col = db[collection]

    for user in col.find(no_cursor_timeout=True):
        current_user = api.get_user(user['_id'])
        friends = current_user.friends()
        for friend in friends:
            for existing_user in col.find():
                if friend.screen_name == existing_user['_id']:
                    print("Creating link between " + user['_id'] + " and " +
                          friend.screen_name)
                    new_link = LinksHS(user_screen_name=user['_id'],
                                       user_id=user['id_str'],
                                       friend_screen_name=friend.screen_name,
                                       friend_id=friend.id_str)
                    new_link.save()
        time.sleep(61)
def get_all_tweets(user_id):
    client = get_twitter_client()
    alltweets = []
    new_tweets = client.user_timeline(user_id=user_id, count=200)
    alltweets.extend(new_tweets)
    oldest = alltweets[-1].id - 1
    for i in range(0,5):
        new_tweets = client.user_timeline(user_id=user_id, count=200, max_id=oldest)
        alltweets.extend(new_tweets)
        oldest = alltweets[-1].id - 1
    '''
    #for csv writing:
    outtweets = [
        [tweet.coordinates, tweet.text.encode("utf-8"), tweet.favorited, tweet.place, tweet.id_str, tweet.created_at,
         tweet.user.location.encode("utf-8")] for tweet in alltweets]
    '''
    jsonResults = {
        "screen_name": client.get_user(user_id=user_id).screen_name,
        "total_tweets": len(alltweets),
        "followers_count": client.get_user(user_id=user_id).followers_count,
        "friends_count": client.get_user(user_id=user_id).friends_count,
        "total_favorite_count": client.get_user(user_id=user_id).favourites_count,
        "tweets": []
    }

    for tweet in alltweets:
        tweet = {
            "id": tweet.id_str,
            "coordinates": tweet.coordinates,
            "text": tweet.text.encode("utf-8"),
            "retweet_count": tweet.retweet_count,
            "created_at": tweet.created_at.isoformat(),
            "favorite_count": tweet.favorite_count,
        }
        jsonResults["tweets"].append(tweet)

    with open('/Users/Eric/Documents/EE695/specialProject/jsonFiles/user_timelines/%s_tweets.json' % user_id, 'w') as f:
        f.write(json.dumps(jsonResults, indent=4))
    f.close()
Example #16
0
def get_number(screen_name, access_file):

    client = get_twitter_client(access_file)

    isProtected = client.get_user(screen_name).protected

    max_pages = math.ceil(MAX_FRIENDS / 5000)

    # get data for a given user

    group1 = set()
    group2 = set()
    people = set()

    counter = 0
    if not isProtected:
        for followers in Cursor(client.followers_ids,
                                screen_name=screen_name,
                                count=1500).pages(max_pages):
            for chunk in paginate(followers, 100):
                users = client.lookup_users(user_ids=chunk)
                for user in users:
                    people.add(json.dumps(user._json))
                    group1.add(json.dumps(user._json))
            counter += len(followers)
        for friends in Cursor(client.friends_ids,
                              screen_name=screen_name,
                              count=1500).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = client.lookup_users(user_ids=chunk)
                for user in users:
                    people.add(json.dumps(user._json))
                    group2.add(json.dumps(user._json) + "\n")
            counter += len(friends)

    return len(people)

def paginate(items, n):
    """ 
	Generate n-sized chunks from items
	"""
    for i in range(0, len(items), n):
        yield items[i:i + n]


if __name__ == '__main__':
    if len(sys.argv) != 2:
        usage()
        sys.exit(1)
    screen_name = sys.argv[1]
    client = get_twitter_client()
    dirname = "users/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)
    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory {}".format(dirname))
        print(e)
        sys.exit(1)

    # get followers for a given user
    fname = "users/{}/followers.jsonl".format(screen_name)
    with open(fname, 'w') as f:
        for followers in Cursor(client.followers_ids,
Example #18
0
def fetch_tweets(kwd, since_id, channel, redis_conf):
    """

    :param kwd:
    :param since_id:
    :param channel:
    :param redis_conf:
    :return:
    """
    r = redis_conf['cursor']
    key = redis_conf['key']

    api, credential_id = get_twitter_client(r, key)
    if not api:
        logger.info(f"{credential_id} failed ...using another one ...")
        api, credential_id = get_twitter_client(r, key)

    keyword = kwd['kwd']
    keyword = f'"{keyword} "' + config.get('FETCHER', 'FILTER')

    page_remaining = int(config.get('FETCHER', 'PAGE_LIMIT'))
    tweets_cursor = Cursor(api.search,
                           q=keyword,
                           count=100,
                           since_id=since_id,
                           tweet_mode='extended').pages(page_remaining)
    page_index = 0
    retry = 0
    t_id = 0
    _sleep = 0
    sleep_delay = int(config.get('FETCHER', 'SLEEP'))
    retry_limit = int(config.get('FETCHER', 'RETRY_LIMIT'))

    while True:
        try:
            print(kwd, page_index)
            tweets, t_id = process_page(tweets_cursor.next(), kwd, page_index)
            feed_saver_new_keyword_tweets(channel, tweets)
            page_index += 1
            page_remaining = int(config.get('FETCHER',
                                            'PAGE_LIMIT')) - page_index
            # sleep(1)

        except StopIteration:
            if page_index == 0:
                # No Tweets Found
                data = {'status': 404, 'k_id': kwd['k_id']}
                feed_saver_new_keyword_tweets(channel, data)
            else:
                # last packet for this kwd so that saver can update scheduled_on
                data = {'status': 202, 'k_id': kwd['k_id']}
                feed_saver_new_keyword_tweets(channel, data)

            # Change credential & lpush current credential id
            r.lpush(key, credential_id)
            return True

        except TweepError as error:
            logger.error(
                f"Tweepy Exception occurred for credential id {credential_id} : {error}"
            )
            # Change credential & lpush current credential id
            r.lpush(key, credential_id)
            retry += 1
            if retry <= retry_limit:
                logger.info(f"Retrying for keyword {kwd['kwd']}")
                _sleep += sleep_delay
                sleep(_sleep)
                api, credential_id = get_twitter_client(r, key)
                tweets_cursor = Cursor(
                    api.search,
                    q=keyword,
                    count=100,
                    since_id=since_id,
                    max_id=t_id,
                    tweet_mode='extended').pages(page_remaining)
                continue
            # finally after retries
            data = {'status': 500, 'k_id': kwd['k_id']}
            feed_saver_new_keyword_tweets(channel, data)
            return False

        except Exception as e:
            # push keyword in queue & maintain log
            logger.error(
                f"Exception occurred for keyword {kwd['kwd']}. Exception : {e}"
            )
            retry += 1
            # Change credential & lpush current credential id
            r.lpush(key, credential_id)
            if retry <= retry_limit:
                _sleep += sleep_delay
                logger.info(f"Retrying for keyword {kwd['kwd']}")
                api, credential_id = get_twitter_client(r, key)
                tweets_cursor = Cursor(
                    api.search,
                    q=keyword,
                    count=100,
                    since_id=since_id,
                    max_id=t_id,
                    tweet_mode='extended').pages(page_remaining)
                continue

            data = {'status': 500, 'k_id': kwd['k_id']}
            feed_saver_new_keyword_tweets(channel, data)
            return False
import tweepy
from textblob import TextBlob
from twitter_client import get_twitter_client

api = get_twitter_client()

# Retrieve Tweets
public_tweets = api.search('#magicleap')
for tweet in public_tweets:
    print(tweet.text)
    analysis = TextBlob(tweet.text)
    print(analysis.sentiment)
from twitter_client import get_twitter_client

MAX_FRIENDS = 15000

def usage():
    print("Usage:")
    print("python {} <username>".format(" aaa "))

def paginate(items, n):
    """Generate n-sized chunks from items"""
    for i in range(0, len(items), n):
        yield items[i:i+n]

if __name__ == '__main__':
    screen_name = "richarddeng88" # "jtimberlake"
    client = get_twitter_client()
    dirname = "users/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)
    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory {}".format(dirname))
        print(e)
        sys.exit(1)

    # get followers for a given user
    fname = "users/{}/followers.jsonl".format(screen_name)
    print fname
    with open(fname, 'w') as f:
Example #21
0
def get_info(twitter_user):
    # screen_name = sys.argv[1]
    screen_name = twitter_user
    client = get_twitter_client()
    dirname = "users/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)

    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory{}".format(dirname))
        print(e)
        sys.exit(1)

    print('Extrayendo a {} \n'.format(screen_name))

    # get followers for a given user
    fjson = "users/{}/followers.jsonl".format(screen_name)
    fcsv = "users/{}/followers.csv".format(screen_name)

    with open(fjson, 'w') as f1, open(fcsv, 'w') as f2:
        for followers in Cursor(client.followers_ids,
                                screen_name=screen_name).pages(max_pages):
            for chunk in paginate(followers, 100):
                users = client.lookup_users(user_ids=chunk)

                # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users]
                out = [[user.id, user.screen_name] for user in users]
                writer = csv.writer(f2)
                writer.writerow(["id", "screen_name"])
                writer.writerows(out)

                for user in users:
                    f1.write(json.dumps(user._json) + "\n")
            if len(followers) == 5000:
                print(
                    "Followers: More results available. Sleeping for 60seconds to avoid rate limit"
                )
                time.sleep(60)

    # get friends for a given user
    fjson = "users/{}/friends.jsonl".format(screen_name)
    fcsv = "users/{}/friends.csv".format(screen_name)

    with open(fjson, 'w') as f1, open(fcsv, 'w') as f2:
        for friends in Cursor(client.friends_ids,
                              screen_name=screen_name).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = client.lookup_users(user_ids=chunk)

                # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users]
                # writer = csv.writer(f2)
                # writer.writerow(["id","screen_name"])
                # writer.writerows(out)

                for user in users:

                    f1.write(json.dumps(user._json) + "\n")
            if len(friends) == 5000:
                print(
                    "Friends: More results available. Sleeping for 60 seconds to avoid rate limit"
                )
                time.sleep(60)

    # get user's profile
    fname = "users/{}/user_profile.json".format(screen_name)
    with open(fname, 'w') as f:
        profile = client.get_user(screen_name=screen_name)
        f.write(json.dumps(profile._json, indent=4))
    except KeyError:
        store[value] = 1
    return


if __name__ == '__main__':

    fname = 'data/for_tweets.txt'
    tmpname = 'data/tmptwt.txt'

    access_file = sys.argv[1]
    screen_name = sys.argv[2]
    period = sys.argv[3]

    if period == 'all':
        client = get_twitter_client(access_file)

        counter = 0
        tweets = []
        dates = Counter()

        current_user = client.get_user(screen_name)

        if not current_user.protected:
            for page in Cursor(client.user_timeline,
                               screen_name=screen_name,
                               count=200).pages(16):
                for status in page:
                    tweets.append(status.created_at.year)
                    counter += 1
        a = Counter(sorted(tweets))
def convert_valid(one_char):
    """Convert a character into '_' if "invalid".

    Return: string
    """
    valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
    if one_char in valid_chars:
        return one_char
    else:
        return '_'


def get_tweets(query_fname, auth, max_time, location=None):
    stop = datetime.now() + max_time
    twitter_stream = Stream(auth, CustomListener(query_fname))
    while datetime.now() < stop:
        if location:
            twitter_stream.filter(
                locations=[
                    11.94, -13.64, 30.54, 5.19], is_async=True)
        else:
            twitter_stream.filter(track=query, is_async=True)


if __name__ == '__main__':
    query = sys.argv[1:]  # list of CLI arguments
    query_fname = ' '.join(query)  # string
    auth = get_twitter_auth()
    location = get_location(get_twitter_client(), 'Congo')
    get_tweets(query_fname, auth, timedelta(minutes=30), location=location)
Example #24
0
def find_reply_count(user_name, tweet_ids):
	client = get_twitter_client()
	searched_tweets = [status for status in ]
                    return False
                else:
                    sys.stderr.write("Error {}\n".format(status))
                    return True

        tweet_listener = TweetListener()
        auth = get_twitter_auth()

        stream = Stream(auth, tweet_listener)
        stream.filter(track=topics, locations=locations)

    return Observable.create(observe_tweets).share()


topics = [
    'RDC',
    'RDCongo',
    'DRC',
    'DRCongo',
]
coordinates = get_location(get_twitter_client(), 'Congo').get('coordinates')
location_tweets = tweet_for(topics=[], locations=[11.94, -13.64, 30.54, 5.19])
hash_tag_tweets = tweet_for(topics=topics)
place_id_tweet = tweet_for(topics=topics, locations=coordinates)
combine_loc_hash_tag = Observable.merge(hash_tag_tweets)

(
    combine_loc_hash_tag.subscribe()

)
Example #26
0
def get_user(screen_name, access_file):

    client = get_twitter_client(access_file)

    account = client.get_user(screen_name)

    max_pages = math.ceil(MAX_FRIENDS / 5000)

    # get data for a given user

    source = []
    target = []
    direction = []
    connections = set()

    users_data = get_user_data(account)

    try:
        users_collection.insert_one(users_data)
    except pymongo.errors.DuplicateKeyError:
        pass

    counter = 0
    if not account.protected:
        for followers in Cursor(client.followers_ids,
                                screen_name=screen_name,
                                count=1500).pages(max_pages):
            for chunk in paginate(followers, 100):
                users = client.lookup_users(user_ids=chunk)
                for user in users:
                    users_data = get_user_data(user)
                    source.append(str(account.id))
                    target.append(str(user.id))
                    direction.append('Directed')
                    connections.add(user.screen_name)
                    try:
                        users_collection.insert_one(users_data)
                    except pymongo.errors.DuplicateKeyError:
                        continue
            counter += len(followers)
        for friends in Cursor(client.friends_ids,
                              screen_name=screen_name,
                              count=1500).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = client.lookup_users(user_ids=chunk)
                for user in users:
                    users_data = get_user_data(user)
                    source.append(str(user.id))
                    target.append(str(account.id))
                    direction.append('Directed')
                    connections.add(user.screen_name)
                    try:
                        users_collection.insert_one(users_data)
                    except pymongo.errors.DuplicateKeyError:
                        continue
            counter += len(friends)

    my_query = {"_id": str(account.id)}
    connections_data = {"$set": {"Connections": list(connections)}}
    users_collection.update_one(my_query, connections_data)
    graph_data = get_graph_data(account, source, target, direction)
    try:
        graph_collection.insert_one(graph_data)
    except pymongo.errors.DuplicateKeyError:
        pass
Example #27
0
def create_graph(screen_name, deepth):
    # safety parameter to changing credentials files
    safety = 0
    # getting access file
    access_file, safety = get_access_file(safety)
    # creating connection with Twitter API
    client = get_twitter_client(access_file)
    # add graph if not in database
    if not graph_collection.find_one({"_id": screen_name}):
        graph_collection.insert_one({"_id": screen_name, "Deepth": deepth})
    # list for users to check
    to_do = []
    to_do.append(screen_name)
    # loop for graph deepth
    counter = 1
    # set to keep connections
    edges = set()
    for i in range(1, deepth + 1):
        # loop for all users
        for name in set(to_do):
            if counter % 50 == 0:
                print(str(counter) + " DONE")
            counter += 1
            account = client.get_user(name)
            # check if friend and followers already available
            try:
                users_collection.find_one({"_id": name})['Friends']
            # if not availavle, download it
            except (KeyError, TypeError) as e:
                if str(e) == "'NoneType' object is not subscriptable":
                    users_collection.insert_one({"_id": name})
                    # list for users for current depth
                    temp = []
                    # check friends and followers of given user
                    try:
                        followers, friends = get_user_connections(
                            account, client)
                        for follower in followers:
                            record = follower + "," + name + ",Directed"
                            # save sonnection
                            edges.add(record)
                            my_query = {"_id": screen_name}
                            data = {"$addToSet": {"Edges": record}}
                            graph_collection.update_one(my_query, data)
                        for friend in friends:
                            record = name + "," + friend + ",Directed"
                            # save connection
                            edges.add(record)
                            my_query = {"_id": screen_name}
                            data = {"$addToSet": {"Edges": record}}
                            graph_collection.update_one(my_query, data)
                        # add users to do for next deepth
                        temp += followers
                        temp += friends
                    except tweepy.TweepError as e:
                        # if Twitter API limit is reached, change access file
                        if e.response.text == '{"errors":[{"message":"Rate limit exceeded","code":88}]}':
                            print('CHANGING ACCESS FILE TO ' + str(safety) +
                                  '!')
                            access_file, safety = get_access_file(safety)
                            # try to check friends and followers of given user again
                            # with new access file
                            try:
                                client = get_twitter_client(access_file)
                                account = client.get_user(screen_name)
                                followers, friends = get_user_connections(
                                    account, client)
                                # check friends and followers of given user
                                for follower in followers:
                                    record = follower + "," + name + ",Directed"
                                    # save connection
                                    edges.add(record)
                                    my_query = {"_id": screen_name}
                                    data = {"$addToSet": {"Edges": record}}
                                    graph_collection.update_one(my_query, data)
                                for friend in friends:
                                    record = name + "," + friend + ",Directed"
                                    # save connection
                                    edges.add(record)
                                    my_query = {"_id": screen_name}
                                    data = {"$addToSet": {"Edges": record}}
                                    graph_collection.update_one(my_query, data)
                                # add users to do for next deepth
                                temp += followers
                                temp += friends
                            except tweepy.TweepError:
                                pass
                        # otherwise, probably deleted user was found
                        else:
                            pass
                elif str(e) == "'Friends'":
                    # list for users for current depth
                    temp = []
                    # check friends and followers of given user
                    try:
                        followers, friends = get_user_connections(
                            account, client)
                        for follower in followers:
                            record = follower + "," + name + ",Directed"
                            # save connection
                            edges.add(record)
                            my_query = {"_id": screen_name}
                            data = {"$addToSet": {"Edges": record}}
                            graph_collection.update_one(my_query, data)
                        for friend in friends:
                            record = name + "," + friend + ",Directed"
                            # save connection
                            edges.add(record)
                            my_query = {"_id": screen_name}
                            data = {"$addToSet": {"Edges": record}}
                            graph_collection.update_one(my_query, data)
                        # add users to do for next deepth
                        temp += followers
                        temp += friends
                    except tweepy.TweepError as e:
                        # if Twitter API limit is reached, change access file
                        if e.response.text == '{"errors":[{"message":"Rate limit exceeded","code":88}]}':
                            print('CHANGING ACCESS FILE TO ' + str(safety) +
                                  '!')
                            access_file, safety = get_access_file(safety)
                            # try to check friends and followers of given user again
                            # with new access file
                            try:
                                client = get_twitter_client(access_file)
                                account = client.get_user(screen_name)
                                followers, friends = get_user_connections(
                                    account, client)
                                for follower in followers:
                                    record = follower + "," + name + ",Directed"
                                    # save connection
                                    edges.add(record)
                                    my_query = {"_id": screen_name}
                                    data = {"$addToSet": {"Edges": record}}
                                    graph_collection.update_one(my_query, data)
                                for friend in friends:
                                    record = name + "," + friend + ",Directed"
                                    # save connection
                                    edges.add(record)
                                    my_query = {"_id": screen_name}
                                    data = {"$addToSet": {"Edges": record}}
                                    graph_collection.update_one(my_query, data)
                                temp += followers
                                temp += friends
                            except tweepy.TweepError:
                                pass
                        # otherwise, probably deleted user was found
                        else:
                            pass
        if i == 1:
            to_do.remove(screen_name)
        to_do += temp

        # save connections to file
        filename = 'connections/{}.csv'.format(screen_name)
        with open(filename, 'w') as f:
            for edge in list(edges):
                f.write(edge + '\n')