def pull_tweet(input_file_name): CONSUMER_KEY = "9At2u3Y2DraTHLSg3D9w6LhE9" CONSUMER_KEY_SECRET = "DRFCbI2t0gMhfV2KnEub6cljowW9zRwmkeMJ0GT9MlMkrkzspM" ACCESS_TOKEN = "1259913765614751745-LwtSI48si3sYekzvxW86syIFsRgirl" ACCESS_TOKEN_SECRET = "e0gpJdT0IXOSxFrhplKMl8FlP0dVnuLg1vwBHzt5Fc9J9" t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) inputF = open(input_file_name, "r") line = inputF.readline() data = [] i = 0 while line != "" and i < 10: try: tweet = t.tweet(line.strip()) if tweet["lang"] == "en": if 'retweeted_status' in tweet.keys(): data.append(tweet['retweeted_status']['full_text'].replace( '\n', ' ')) else: data.append(data, tweet['full_text'].replace('\n', ' ')) i += 1 line = inputF.readline() except Exception as e: line = inputF.readline() return data
def readIdFile(input_file_name, output_file_name): try: t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) output_file = open(output_file_name, "w") inputF = open(input_file_name, "r") line = inputF.readline() i = 1 while line != "": tweet = t.tweet(line.strip()) if tweet["lang"] == "en": output_file.write(json.dumps(tweet) + "\n") print(i) i += 1 line = inputF.readline() except Exception as e: print(e) return 1
def pull_tweet(input_file_name): """ Args: input_file_name: A txt file containing tweet IDs Returns: the corresponding tweet text """ # Required Keys CONSUMER_KEY = "9At2u3Y2DraTHLSg3D9w6LhE9" CONSUMER_KEY_SECRET = "DRFCbI2t0gMhfV2KnEub6cljowW9zRwmkeMJ0GT9MlMkrkzspM" ACCESS_TOKEN = "1259913765614751745-LwtSI48si3sYekzvxW86syIFsRgirl" ACCESS_TOKEN_SECRET = "e0gpJdT0IXOSxFrhplKMl8FlP0dVnuLg1vwBHzt5Fc9J9" # Initializing twarc module t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) inputF = open(input_file_name, "r") line = inputF.readline() data = [] i = 0 # Read each line of the input file while line != "" and i < 10: try: tweet = t.tweet(line.strip()) if tweet["lang"] == "en": if 'retweeted_status' in tweet.keys(): data.append(tweet['retweeted_status']['full_text'].replace( '\n', ' ')) else: data.append(data, tweet['full_text'].replace('\n', ' ')) i += 1 if i % 10 == 0: print('Pulled {} tweets'.format(i)) line = inputF.readline() # Skip line if the tweet no longer exists except Exception as e: line = inputF.readline() return data
def f(file): print file today = datetime.date.today() margin = datetime.timedelta(days=7) analyzer = SentimentIntensityAnalyzer() acess_changer = [c8, c9, c10, c7, c1, c2, c3, c4] # base_dir='/home/stealthuser/Perosnal/Sentimental/12 august/Data/Hamid output csvs/' base_dir = c_p.input_csv_path if os.path.exists(base_dir + 'reply/'): output_dir = base_dir + 'reply/' pass else: os.mkdir(base_dir + 'reply/') output_dir = base_dir + 'reply/' data = pd.read_csv(base_dir + file, low_memory=False, error_bad_lines=False) data['reply'] = '' reply = [] acess_changer_counter = 0 max_tweet = 35 config_key = 0 for index, tweet in enumerate(data['text']): print file + " " + str(index) t_id = data.ix[index, 'permalink'].split('/')[-1:][0] tweet_date = data.ix[index, 'date'].split()[0] if (acess_changer_counter % max_tweet == 0): access_point = acess_changer[config_key % len(acess_changer)] config_key += 1 t = Twarc(access_point.consumer_key, access_point.consumer_secret, access_point.access_token, access_point.access_token_secret) acess_changer_counter += 1 print access_point tweet_r = t.tweet(t_id) if (len(tweet_r) > 0): reply_tweets = [] req_format_date = tuple( map(lambda x: int(x), tweet_date.split('-'))) if (today - margin <= datetime.date(req_format_date[0], req_format_date[1], req_format_date[2])): for reply_tweet in t.replies(tweet_r): reply.append(reply_tweet['text'].encode('utf-8')) # print reply_tweet['text'].encode('utf-8') reply_tweets.append(reply_tweet['text'].encode('utf-8')) # delimiter for replies is '==<>==' data.ix[index, 'reply'] = '==<>=='.join(reply_tweets) df_reply_comment = pd.DataFrame({'replies': reply}) for index_comment, reply in enumerate(df_reply_comment['replies']): score = analyzer.polarity_scores(str(reply)) df_reply_comment.ix[index_comment, 'positive'] = score['pos'] df_reply_comment.ix[index_comment, 'negative'] = score['neg'] df_reply_comment.ix[index_comment, 'neutral'] = score['neu'] df_reply_comment.ix[index_comment, 'compound'] = score['compound'] # df_reply_comment.to_csv(output_dir+file[:-4]+'_replies_sentiment.csv') data.to_csv(output_dir + file[:-4] + '_comments.csv') status, paths_reply_label = classification_tweet(df_reply_comment, output_dir, file[:-4])
# Check that tweet_id was provided if len(sys.argv) != 2: raise RuntimeError( "Program should be called like: `python main.py <tweet_id>`") tweet_id = sys.argv[1] # Main t = Twarc( consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret, ) tweet = t.tweet(tweet_id) if not tweet: raise RuntimeError(f"tweet with id {tweet_id} does not exist") # replies is a generator object replies = t.replies(tweet, True) # List to hold dict of relevant photo data from each of the replies photo_data = [] for reply in replies: # Photos will be in a list stored at reply['extended_entities']['media'] print("Processing next reply") ee = reply.get("extended_entities") if ee is None: continue