コード例 #1
0
 def collect_replies(self):
     """
     Collect replies for all tweets from query using twarc
     :return:
     """
     twarc = Twarc(self.ak, self.aks, self.at, self.ats)
     reply_count = 0
     # loop through all parent tweets from query
     for tweet in self.tweets:
         replies = []
         reps = twarc.replies(
             self.tweepy_to_twarc(tweet),
             recursive=False)  # get iterator for replies from twarc
         rep = next(
             reps)  # first "rep" is the parent tweet so we don't use it
         i = 0
         # max 30 replies
         while i < 30:
             try:
                 rep = next(reps)  # get next reply and add it to list
                 replies.append(rep)
                 i = i + 1
             except StopIteration:
                 break
             except Exception as e:
                 print('error: ', e)
         self.dict[tweet.id] = replies  # add tweet to dict {id:replies}
         reply_count += len(replies)
     print(reply_count, ' replies were collected')
コード例 #2
0
    print(name)
    file_name = r"C:\\Users\\ravik\\OneDrive\\Desktop\\UsertimelineReplies\\" + str(name) + ".json"
    max_poi_tweet = 0
    with open( file_name, "a", encoding='utf-8') as file:
        for tweet in t.timeline(screen_name=name):
            if 'retweeted_status' in tweet.keys():
                print("Its a retweet")
                continue
            if max_poi_tweet > 3000:
                break
            json.dump(tweet, file,  ensure_ascii=False)
            file.write("\n")
            max_poi_tweet +=1
            max_replies = 0
            if datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y").date() >= time_range:
                for reply in t.replies(tweet):
                    #print("In")
                    #preprocessing(tweet, file)
                    if 'retweeted_status' in tweet.keys():
                        print("Its a retweet")
                        continue
                    else:
                        json.dump(reply, file,  ensure_ascii=False)
                        file.write("\n")
                        max_replies +=1
                        print("{} tweet {} reply number {}".format(name, max_poi_tweet, max_replies))
                        if max_replies > 21:
                            break
            else:
                print("{} tweet {} Date didnt satisfy".format(name, max_poi_tweet))
#time.sleep(10)
コード例 #3
0
def f(file):
    print file
    today = datetime.date.today()
    margin = datetime.timedelta(days=7)
    analyzer = SentimentIntensityAnalyzer()
    acess_changer = [c8, c9, c10, c7, c1, c2, c3, c4]

    # base_dir='/home/stealthuser/Perosnal/Sentimental/12 august/Data/Hamid output csvs/'
    base_dir = c_p.input_csv_path
    if os.path.exists(base_dir + 'reply/'):
        output_dir = base_dir + 'reply/'
        pass
    else:
        os.mkdir(base_dir + 'reply/')
        output_dir = base_dir + 'reply/'

    data = pd.read_csv(base_dir + file,
                       low_memory=False,
                       error_bad_lines=False)
    data['reply'] = ''
    reply = []
    acess_changer_counter = 0
    max_tweet = 35
    config_key = 0
    for index, tweet in enumerate(data['text']):
        print file + " " + str(index)
        t_id = data.ix[index, 'permalink'].split('/')[-1:][0]
        tweet_date = data.ix[index, 'date'].split()[0]

        if (acess_changer_counter % max_tweet == 0):
            access_point = acess_changer[config_key % len(acess_changer)]
            config_key += 1
            t = Twarc(access_point.consumer_key, access_point.consumer_secret,
                      access_point.access_token,
                      access_point.access_token_secret)

        acess_changer_counter += 1
        print access_point
        tweet_r = t.tweet(t_id)
        if (len(tweet_r) > 0):
            reply_tweets = []
            req_format_date = tuple(
                map(lambda x: int(x), tweet_date.split('-')))
            if (today - margin <=
                    datetime.date(req_format_date[0], req_format_date[1],
                                  req_format_date[2])):
                for reply_tweet in t.replies(tweet_r):
                    reply.append(reply_tweet['text'].encode('utf-8'))
                    # print reply_tweet['text'].encode('utf-8')
                    reply_tweets.append(reply_tweet['text'].encode('utf-8'))
            # delimiter for replies is '==<>=='

            data.ix[index, 'reply'] = '==<>=='.join(reply_tweets)
    df_reply_comment = pd.DataFrame({'replies': reply})

    for index_comment, reply in enumerate(df_reply_comment['replies']):
        score = analyzer.polarity_scores(str(reply))
        df_reply_comment.ix[index_comment, 'positive'] = score['pos']
        df_reply_comment.ix[index_comment, 'negative'] = score['neg']
        df_reply_comment.ix[index_comment, 'neutral'] = score['neu']
        df_reply_comment.ix[index_comment, 'compound'] = score['compound']

    # df_reply_comment.to_csv(output_dir+file[:-4]+'_replies_sentiment.csv')
    data.to_csv(output_dir + file[:-4] + '_comments.csv')
    status, paths_reply_label = classification_tweet(df_reply_comment,
                                                     output_dir, file[:-4])
コード例 #4
0
tweet_id = sys.argv[1]

# Main
t = Twarc(
    consumer_key=consumer_key,
    consumer_secret=consumer_secret,
    access_token=access_token,
    access_token_secret=access_token_secret,
)

tweet = t.tweet(tweet_id)
if not tweet:
    raise RuntimeError(f"tweet with id {tweet_id} does not exist")
# replies is a generator object
replies = t.replies(tweet, True)

# List to hold dict of relevant photo data from each of the replies
photo_data = []
for reply in replies:
    # Photos will be in a list stored at reply['extended_entities']['media']
    print("Processing next reply")

    ee = reply.get("extended_entities")
    if ee is None:
        continue

    m = ee.get("media")
    if m is None:
        continue