コード例 #1
0
ファイル: utils.py プロジェクト: ColinWan/DataFest2020
def pull_tweet(input_file_name):
    CONSUMER_KEY = "9At2u3Y2DraTHLSg3D9w6LhE9"
    CONSUMER_KEY_SECRET = "DRFCbI2t0gMhfV2KnEub6cljowW9zRwmkeMJ0GT9MlMkrkzspM"
    ACCESS_TOKEN = "1259913765614751745-LwtSI48si3sYekzvxW86syIFsRgirl"
    ACCESS_TOKEN_SECRET = "e0gpJdT0IXOSxFrhplKMl8FlP0dVnuLg1vwBHzt5Fc9J9"

    t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN,
              ACCESS_TOKEN_SECRET)
    inputF = open(input_file_name, "r")
    line = inputF.readline()
    data = []
    i = 0
    while line != "" and i < 10:
        try:
            tweet = t.tweet(line.strip())
            if tweet["lang"] == "en":
                if 'retweeted_status' in tweet.keys():
                    data.append(tweet['retweeted_status']['full_text'].replace(
                        '\n', ' '))
                else:
                    data.append(data, tweet['full_text'].replace('\n', ' '))
                i += 1
            line = inputF.readline()
        except Exception as e:
            line = inputF.readline()
    return data
コード例 #2
0
ファイル: dataobtainer.py プロジェクト: ColinWan/DataFest2020
def readIdFile(input_file_name, output_file_name):
    try:
        t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN,
                  ACCESS_TOKEN_SECRET)
        output_file = open(output_file_name, "w")
        inputF = open(input_file_name, "r")
        line = inputF.readline()
        i = 1
        while line != "":
            tweet = t.tweet(line.strip())
            if tweet["lang"] == "en":
                output_file.write(json.dumps(tweet) + "\n")
                print(i)
                i += 1
            line = inputF.readline()
    except Exception as e:
        print(e)
        return 1
コード例 #3
0
def pull_tweet(input_file_name):
    """

    Args:
        input_file_name: A txt file containing tweet IDs

    Returns: the corresponding tweet text

    """

    # Required Keys
    CONSUMER_KEY = "9At2u3Y2DraTHLSg3D9w6LhE9"
    CONSUMER_KEY_SECRET = "DRFCbI2t0gMhfV2KnEub6cljowW9zRwmkeMJ0GT9MlMkrkzspM"
    ACCESS_TOKEN = "1259913765614751745-LwtSI48si3sYekzvxW86syIFsRgirl"
    ACCESS_TOKEN_SECRET = "e0gpJdT0IXOSxFrhplKMl8FlP0dVnuLg1vwBHzt5Fc9J9"

    # Initializing twarc module
    t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN,
              ACCESS_TOKEN_SECRET)
    inputF = open(input_file_name, "r")
    line = inputF.readline()
    data = []
    i = 0

    # Read each line of the input file
    while line != "" and i < 10:
        try:
            tweet = t.tweet(line.strip())
            if tweet["lang"] == "en":
                if 'retweeted_status' in tweet.keys():
                    data.append(tweet['retweeted_status']['full_text'].replace(
                        '\n', ' '))
                else:
                    data.append(data, tweet['full_text'].replace('\n', ' '))
                i += 1
                if i % 10 == 0:
                    print('Pulled {} tweets'.format(i))
            line = inputF.readline()

        # Skip line if the tweet no longer exists
        except Exception as e:
            line = inputF.readline()
    return data
コード例 #4
0
def f(file):
    print file
    today = datetime.date.today()
    margin = datetime.timedelta(days=7)
    analyzer = SentimentIntensityAnalyzer()
    acess_changer = [c8, c9, c10, c7, c1, c2, c3, c4]

    # base_dir='/home/stealthuser/Perosnal/Sentimental/12 august/Data/Hamid output csvs/'
    base_dir = c_p.input_csv_path
    if os.path.exists(base_dir + 'reply/'):
        output_dir = base_dir + 'reply/'
        pass
    else:
        os.mkdir(base_dir + 'reply/')
        output_dir = base_dir + 'reply/'

    data = pd.read_csv(base_dir + file,
                       low_memory=False,
                       error_bad_lines=False)
    data['reply'] = ''
    reply = []
    acess_changer_counter = 0
    max_tweet = 35
    config_key = 0
    for index, tweet in enumerate(data['text']):
        print file + " " + str(index)
        t_id = data.ix[index, 'permalink'].split('/')[-1:][0]
        tweet_date = data.ix[index, 'date'].split()[0]

        if (acess_changer_counter % max_tweet == 0):
            access_point = acess_changer[config_key % len(acess_changer)]
            config_key += 1
            t = Twarc(access_point.consumer_key, access_point.consumer_secret,
                      access_point.access_token,
                      access_point.access_token_secret)

        acess_changer_counter += 1
        print access_point
        tweet_r = t.tweet(t_id)
        if (len(tweet_r) > 0):
            reply_tweets = []
            req_format_date = tuple(
                map(lambda x: int(x), tweet_date.split('-')))
            if (today - margin <=
                    datetime.date(req_format_date[0], req_format_date[1],
                                  req_format_date[2])):
                for reply_tweet in t.replies(tweet_r):
                    reply.append(reply_tweet['text'].encode('utf-8'))
                    # print reply_tweet['text'].encode('utf-8')
                    reply_tweets.append(reply_tweet['text'].encode('utf-8'))
            # delimiter for replies is '==<>=='

            data.ix[index, 'reply'] = '==<>=='.join(reply_tweets)
    df_reply_comment = pd.DataFrame({'replies': reply})

    for index_comment, reply in enumerate(df_reply_comment['replies']):
        score = analyzer.polarity_scores(str(reply))
        df_reply_comment.ix[index_comment, 'positive'] = score['pos']
        df_reply_comment.ix[index_comment, 'negative'] = score['neg']
        df_reply_comment.ix[index_comment, 'neutral'] = score['neu']
        df_reply_comment.ix[index_comment, 'compound'] = score['compound']

    # df_reply_comment.to_csv(output_dir+file[:-4]+'_replies_sentiment.csv')
    data.to_csv(output_dir + file[:-4] + '_comments.csv')
    status, paths_reply_label = classification_tweet(df_reply_comment,
                                                     output_dir, file[:-4])
コード例 #5
0
# Check that tweet_id was provided
if len(sys.argv) != 2:
    raise RuntimeError(
        "Program should be called like: `python main.py <tweet_id>`")

tweet_id = sys.argv[1]

# Main
t = Twarc(
    consumer_key=consumer_key,
    consumer_secret=consumer_secret,
    access_token=access_token,
    access_token_secret=access_token_secret,
)

tweet = t.tweet(tweet_id)
if not tweet:
    raise RuntimeError(f"tweet with id {tweet_id} does not exist")
# replies is a generator object
replies = t.replies(tweet, True)

# List to hold dict of relevant photo data from each of the replies
photo_data = []
for reply in replies:
    # Photos will be in a list stored at reply['extended_entities']['media']
    print("Processing next reply")

    ee = reply.get("extended_entities")
    if ee is None:
        continue