Ejemplo n.º 1
0
    def create_search_payload(self):
        if self.do_sandbox:
            self.rule = gen_rule_payload(self.raw_rule, results_per_call=100,
                                         from_date=self.from_date, to_date=self.to_date)

        else:
            self.rule = gen_rule_payload(self.raw_rule, results_per_call=500,
                                         from_date=self.from_date, to_date=self.to_date)
Ejemplo n.º 2
0
def get_file(aname,
             cak,
             cask,
             etype,
             hashtag,
             keywords,
             fdate='00-00-0000',
             tdate='00-00-0000',
             ftime='00:00',
             ttime='00:00'):

    if etype == 'efa':  # Full archive scraping (refer to limits on README)
        endp = 'https://api.twitter.com/1.1/tweets/search/fullarchive/' + aname + '.json'
    elif etype == 'tdays':  # 30 days scraping (refer to limits on README)
        endp = 'https://api.twitter.com/1.1/tweets/search/30day/' + aname + '.json'
    else:
        endp = 'ERROR'

    # Creating a yaml credentials file
    config = dict(search_tweets_api=dict(account_type='premium',
                                         endpoint=endp,
                                         consumer_key=cak,
                                         consumer_secret=cask))

    with open('C:\\Users\\Samuktha\\Documents\\USC\\twitter\\proj\\cred.yaml',
              'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    # loading credentials
    premium_search_args = load_credentials(
        'C:\\Users\\Samuktha\\Documents\\USC\\twitter\\proj\\cred.yaml',
        yaml_key='search_tweets_api',
        env_overwrite=True)
    print(premium_search_args)

    if etype == 'efa':
        rule = gen_rule_payload(
            results_per_call=100,
            from_date=fdate + ' ' + ftime,  #"2019-07-06 01:00",
            to_date=tdate + ' ' + ttime,  #"2019-07-06 02:15",
            pt_rule=keywords,
        )
    else:
        rule = gen_rule_payload(results_per_call=100, pt_rule=keywords)

    # result stream

    rs = ResultStream(rule_payload=rule, max_results=50, **premium_search_args)

    return rs
Ejemplo n.º 3
0
def searchtweets_query(file_name='../../Twitter_cred/full_arch_2007-2020.txt'):

    premium_search_args = load_credentials(
        filename="~/.twitter_keys.yaml",
        yaml_key="search_tweets_fullarchive_dev",
        env_overwrite=False)

    print(premium_search_args)
    print()

    #query = "(nat OR natte OR water OR wateroverlast OR regen OR storm OR blank OR bui OR overstroming OR hoosbui OR schade OR noodweer OR wolkbreuk OR waterschade) has:geo place_country:NL"
    query = "(wateroverlast OR overstroming OR waterschade) has:geo place_country:NL"

    do_query = False

    if do_query:
        from_date = "2007-01-01"
        to_date = "2020-01-01"
        rule = gen_rule_payload(query,
                                results_per_call=500,
                                from_date=from_date,
                                to_date=to_date)
        tweets = collect_results(rule,
                                 max_results=500 * 50,
                                 result_stream_args=premium_search_args
                                 )  # change this if you need to
        for tweet in tweets:
            with open(file_name, 'a') as fp:
                fp.write(json.dumps(tweet) + '\n')
    else:
        print(
            "No query was done, in order to perform a Twitter query, set do_query to True in Twitter/searchtweets_query.py"
        )
Ejemplo n.º 4
0
def get_tweets(query_set, twitter_args, query_filter=None):
    tweets_list = list()
    params = c.TWITTER_PARAMS
    for query in query_set:
        curr_month = "{}-{}".format(dt.now().year,
                                    format_date_str(dt.now().month))
        _, curr_usage = get_twitter_api_usage(curr_month)
        if curr_usage >= 24999:
            print("Twitter API limit is about to exceed! Returning now ...\n")
            break
        if query_filter:
            q = '("{}") {}'.format(query, query_filter)
        else:
            q = "{}".format(query)
            print("No filter/Filter in query_set: {}".format(q))
        print("Collecting for {}".format(q))
        try:
            rule = gen_rule_payload(
                q, results_per_call=params["RESULTS_PER_CALL"])
            tweets = collect_results(rule,
                                     max_results=params["MAX_RESULTS"],
                                     result_stream_args=twitter_args)
            print("number of tweets: {}".format(len(tweets)))
            update_twitter_api_usage(curr_month, len(tweets))
            tweets_list.append(tweets)

        except Exception as e:
            print("Exception occurred while fetching tweets: {}".format(e))
            break
    return tweets_list
    def premium_set_search_params(self,
                                  search_query,
                                  from_date,
                                  to_date,
                                  no_retweets=True,
                                  results_per_call=500):
        """
        Sets the Search Query and maximum Tweets
        to be retrieved to save Quota
        """

        # Set a static Language Filter for English Tweets
        lang_filter = ' lang:en'
        if no_retweets:
            rt_filter = ' -is:retweet'
            # Adds an ignore Retweets tag to the (Altcoin) Query
            self.query = search_query + lang_filter + rt_filter
        else:
            # This Query includes all Tweets, also Retweets
            self.query = search_query + lang_filter
        # Sets the Rule for the Query to be executed (time frame & # of Results)
        self.rule = gen_rule_payload(self.query,
                                     results_per_call=results_per_call,
                                     from_date=from_date,
                                     to_date=to_date)
Ejemplo n.º 6
0
def counts(queries, nameList):
    # premium_search_args = load_credentials(filename="twitter_keys.yaml", yaml_key="search_tweets_api", env_overwrite=False)
    # queries = ['"$LTC" OR "Litecoin"','"$ETH" OR "Ethereum"','"$BTC" OR "Bitcoin"', 'Holochain', '"$NPXS" OR "Pundi X"']

    counts = []
    for i in range(0, len(queries)):
        count_rule = gen_rule_payload(queries[i], count_bucket="day")
        temp = collect_results(count_rule,
                               result_stream_args=premium_search_args)
        print(temp)
        print("\n")
        counts.append(temp[1]['count'])
    print('\n', counts)
    """CryptoCompare"""
    from cryptocompy import price

    avgPrices = []
    toCurr = 'USD'
    yesterday = date.today() - timedelta(1)
    datestr = str(yesterday) + ' 00:00:00'

    for elem in nameList:
        # avgtemp = price.get_day_average_price(elem[0], toCurr)[elem[0]]['USD']
        # avgPrices.append(avgtemp)
        eodtemp = price.get_historical_eod_price(elem[0],
                                                 toCurr,
                                                 datestr,
                                                 try_conversion=True)
        eodtemp = eodtemp[elem[0]][toCurr]
        avgPrices.append(eodtemp)

    plot(counts, avgPrices, nameList)
Ejemplo n.º 7
0
def search_lima(search, premium_search_args):
    # rule = gen_rule_payload(search + " point_radius:[-12.089282 -77.020041 10mi]", results_per_call=100)
    rule = gen_rule_payload(search + "place:Peru", results_per_call=100)
    data = collect_results(rule,
                           max_results=100,
                           result_stream_args=premium_search_args)
    return data
Ejemplo n.º 8
0
    def arquive_search(self,
                       query,
                       start,
                       end,
                       dev_env,
                       max_size=2500,
                       max_call=100):
        self.settings['search_tweets_api']['endpoint'] =\
           f"https://api.twitter.com/1.1/tweets/search/fullarchive/{dev_env}.json"

        credentials = load_credentials("archive_keys.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)

        with open('archive_keys.yaml', 'w') as config_file:
            yaml.dump(self.settings, config_file, default_flow_style=False)

        q_rule = gen_rule_payload(query,
                                  results_per_call=max_call,
                                  from_date=start,
                                  to_date=end)

        rs = ResultStream(rule_payload=q_rule,
                          max_results=max_size,
                          **credentials)

        with open('tweet_data_archive.csv', 'a', encoding='utf-8') as file:
            n = 0
            for tweet in rs.stream():
                n += 1
                if n % (max_size / 10) == 0:
                    print('{0}: {1}'.format(str(n), tweet['created_at']))
                json.dump(tweet, file)
                file.write('\n')
Ejemplo n.º 9
0
def count_tweets(query,
                 from_date,
                 to_date,
                 credentials_path,
                 yaml_key,
                 count_bucket="day",
                 results_per_call=500,
                 verbose=False,
                 **kwargs):
    """
    Returns the number of existing Tweets for a given query and time
    frame. Since this function doesn't pull tweets, this is a safe option
    to check the effectiveness of your filters without exhausting the
    API's capacity.

    Parameters
    ----------
    query : str
        Query passed to the Twitter API to fecth Tweets.
    from_date : str or None
        Date format as specified by `convert_utc_time` for the starting time
        of your search.
    to_date : str or None
        Date format as specified by `convert_utc_time` for the end time of
        your search.
    credentials_path : str
        Path for the yaml file with the Twitter API credentials.
    yaml_key : str
        Key within the yaml file containing the Twitter API credentials to be
        used.
    count_bucket : str or None, default="day"
        If using the counts api endpoint, will define the count bucket for
        which tweets are aggregated.
    results_per_call : int, default=500
        Number of Tweets returned per call.
    verbose : int or bool, default=False
        Controls the verbosity when pulling the tweet count.

    Returns
    -------
    counts : dict
        Number of existing tweets for each bucket.
    """

    logger = logging.getLogger(__name__)
    logger.propagate = verbose
    logger.info('Counting Tweets')

    search_args = load_credentials(credentials_path, yaml_key=yaml_key)

    count_rule = gen_rule_payload(query,
                                  from_date=from_date,
                                  to_date=to_date,
                                  count_bucket=count_bucket,
                                  results_per_call=results_per_call)

    counts = collect_results(count_rule, result_stream_args=search_args)

    return counts
Ejemplo n.º 10
0
    def get_rule_count(self):
        """ before calling the production api, get a count of the tweets that match the rule """
        rule_count = gen_rule_payload(self.raw_rule,
                                      from_date=self.from_date, to_date=self.to_date,
                                      results_per_call=500, count_bucket='day')

        counts_list = collect_results(rule_count, max_results=500, result_stream_args=self.premium_search_args)
        [print(count) for count in counts_list]
Ejemplo n.º 11
0
def use_premium(search, filename, from_date, to_date, enpoint='full'):
    '''
    Collect historical tweets
    '''
    if endpoint == '30day':
        endpoint_key = 'search_premium_30day_api'
        #endpoint_key = 'search_lynxx_30day_api'
    else:
        endpoint_key = 'search_premium_full_api'
        #endpoint_key = 'search_lynxx_full_api'

    try:
        tweet_df = pd.read_csv(filename, dtype=str, encoding='ISO-8859-1')
    except FileNotFoundError:
        tweet_df = pd.DataFrame()

    # Extract the credentials for the endpoint.
    search_stream = load_credentials(filename='./credentials.yaml',
                                     yaml_key=endpoint_key,
                                     env_overwrite=False)

    # Collect tweets while we are permitted.
    # Todo: Still dont know how to catch the re-try limit error?
    while to_date > from_date:

        rule = gen_rule_payload(search,
                                from_date=from_date,
                                to_date=to_date,
                                results_per_call=100)
        try:
            tweets = collect_results(rule,
                                     max_results=2000,
                                     result_stream_args=search_stream)
        except:
            break

        for idx, tweet in enumerate(tweets):
            tweet_df = tweet_df.append([json_normalize(tweet)],
                                       ignore_index=True,
                                       sort=False)

            if idx % 1000 == 0:
                print(f'{tweet["created_at"]}: {tweet["text"]}')
                tweet_df.to_csv(filename, index=False)

        tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'],
                                                utc=True)
        mindate = min(tweet_df['created_at']).date() - timedelta(hours=1)
        to_date = mindate.strftime('%Y-%m-%d %H:%M')

    tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'])
    min(tweet_df['created_at'])

    tweet_df.drop_duplicates(subset=['created_at', 'user.screen_name'],
                             keep='first',
                             inplace=True)
    tweet_df.sort_values(by='created_at', inplace=True)
    tweet_df.to_csv(filename, index=False)
Ejemplo n.º 12
0
    def tw_get_premium_search(self, keyword: str):
        with open(f'datasets/tw_{keyword.lower()}_searches_premium.json',
                  'w') as f:
            try:
                f.write('{"statuses": [')

                rule = gen_rule_payload(
                    pt_rule="near:\"New York, NY\" within:50mi".format(),
                    results_per_call=100,
                    from_date="2018-07-01",
                    to_date="2018-10-01")

                rule = gen_rule_payload(
                    pt_rule="place:\"New York, NY\"".format(),
                    results_per_call=100,
                    from_date=(datetime.date.today() -
                               datetime.timedelta(31)).isoformat(),
                    to_date=datetime.date.today().isoformat())

                next_token = None
                while True:
                    results = ResultStream(rule_payload=rule,
                                           **self.twitter_premium_api)
                    results.next_token = next_token

                    tweets = []

                    try:
                        tweets = list(results.stream())
                    except Exception as ex:
                        print(str(ex))

                    for tweet in tweets:
                        f.write("%s," % json.dumps(tweet))

                    if results.next_token is None:
                        break
                    else:
                        next_token = results.next_token

                next_token is not None and f.seek(f.tell() - 1, os.SEEK_SET)
                f.write("]}")

            except Exception as ex:
                print("Error:\n" + str(ex))
Ejemplo n.º 13
0
def _download_tweets(trend, enterprise_search_args):
    powertrack_rule = '(has:geo OR has:profile_geo) lang:en -is:retweet %s' % trend
    rule = gen_rule_payload(powertrack_rule, results_per_call=500)
    rs = ResultStream(rule_payload=rule,
                      max_requests=2,
                      **enterprise_search_args)
    for tweet in rs.stream():
        print(tweet)
        _store_tweet(tweet)
Ejemplo n.º 14
0
def get_tweets(trend,date):
    enddate = date+datetime.timedelta(days=1)
    username="******"
    password="******"
    endpoint="https://gnip-api.twitter.com/search/fullarchive/accounts/greg-students/prod.json"
    bearer_token=""
    rule = gen_rule_payload(trend+" lang:en",from_date=date.isoformat() ,to_date=enddate.isoformat(), results_per_call=500) # testing with a sandbox account
    rs=ResultStream(rule_payload=rule,max_results=10000,max_pages=10, username=username,endpoint=endpoint, password=password)
    #tweets=collect_results(rule, result_stream_args=args,max_results=20000)
    return rs
Ejemplo n.º 15
0
 def __init__(self, search_query):
     print(self.__class__.__name__)
     self.premium_search_args = searchtweets.load_credentials()
     self.rule = searchtweets.gen_rule_payload(
         search_query.query,
         to_date=(datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d'))
     try:
         self.iter = iter(
             searchtweets.collect_results(
                 self.rule, result_stream_args=self.premium_search_args))
     except Exception:
         self.iter = iter([])
Ejemplo n.º 16
0
def read_tweets(term):
    """
    @return: string output split into 2000 messages.
    """
    rule = gen_rule_payload(
        term, results_per_call=100)  # testing with a sandbox account
    print(rule)
    tweets = collect_results(rule, 100, premium_search_args())
    print(tweets[:10])
    output = '\n\n'.join(
        [f'@{t.screen_name}: {t.all_text}' for t in tweets[:10]])
    output = split_2000(output)
    return output
Ejemplo n.º 17
0
def get_premium_tweets(candidate, handle, topic):
    rule = gen_rule_payload(topic + " to:" + handle, results_per_call=100)
    tweets = collect_results(rule,
                             max_results=100,
                             result_stream_args=premium_search_args)
    data = TweetHelpers.get_tweet_objects(candidate, topic)
    if os.path.exists(candidate + "/" + topic + "/raw/tweets.pkl"):
        os.remove(candidate + "/" + topic + "/raw/tweets.pkl")
    if data is None:
        data = []
    data += tweets
    TweetHelpers.pickle_data(candidate + "/" + topic + "/raw/tweets.pkl",
                             tweets)
Ejemplo n.º 18
0
 def make_rule(handle, to_date, from_date, results_per_call):
     """
     Inputs:
         - handle (should be changed to id)
         - to_date
     """
     #print('Using',results_per_call,' results per call. Should be 100 for sandbox, 500 for premium')
     _rule_a = "from:" + handle
     rule = gen_rule_payload(_rule_a,
                             from_date=from_date,
                             to_date=to_date,
                             results_per_call=results_per_call)
     return rule
Ejemplo n.º 19
0
def _download_tweets(trend):
    powertrack_rule = '%s (has:geo OR has:profile_geo) lang:en -is:retweet' % trend
    rule = gen_rule_payload(powertrack_rule,
                            results_per_call=500,
                            to_date=None,
                            from_date='201207220000')
    logging.info("PowerTrack rule: %s" % rule)
    rs = ResultStream(rule_payload=rule,
                      max_results=500,
                      max_requests=1,
                      **enterprise_search_args)
    for tweet in rs.stream():
        _push_tweet(tweet, trend)
Ejemplo n.º 20
0
 def get_emoji_tweets(self, emoji_list):
     emoji_list = ' OR '.join(emoji_list)
     print(emoji_list)
     max_tweets = 100
     rule = searchtweets.gen_rule_payload(
         emoji_list,
         # from_date="2017-01-01", #UTC 2017-09-01 00:00
         # to_date="2019-02-12",#UTC 2017-10-30 00:00
         results_per_call=max_tweets)
     print(rule)
     tweets = searchtweets.collect_results(
         rule, max_results=500, result_stream_args=self.premium_search_args)
     return tweets
Ejemplo n.º 21
0
 def get_premium_all_tweets(self):
     rule_str = "from:" + self.twitter_user.screen_name
     print("get_all_twitter_user_tweets: rule_str: " + rule_str)
     rule = gen_rule_payload(rule_str)
     tweets_paresd = []
     try:
         tweets = collect_results(rule, max_results=100, result_stream_args=search_args)
         print("tweets len:" + str(len(tweets)))
         for t in tweets:
             tweets_paresd.append(twitter.Status().NewFromJsonDict(t))
         print("tweets_paresd len:" + str(len(tweets_paresd)))
         self.place.add_tweet_list(tweets_paresd, self.user, self.region, old_user=True)
     except Exception as exc:
         print("In get_all_twitter_user_tweets, Problem loading tweets")
         print(exc)
     return tweets_paresd
    def collect_and_write_tweets(self,
                                 query: str,
                                 results_per_call: int = 100,
                                 num_tweets: int = 100,
                                 from_date: datetime.date = None,
                                 to_date: datetime.date = None):
        """
        :param query:
        :param results_per_call
        :param num_tweets:
        :param from_date:
        :param to_date:
        :return:
        """

        if results_per_call > 100:
            print(
                "Sandbox API limited to 100 results per request, cannot retrieve {} results"
                .format(results_per_call))

        rule = gen_rule_payload(query,
                                results_per_call=results_per_call,
                                from_date=from_date.isoformat(),
                                to_date=to_date.isoformat())

        tweets = collect_results(rule,
                                 max_results=num_tweets,
                                 result_stream_args=self.premium_search_args)

        # cast tweet objects to dict and create pandas data frame
        tweets_dict_list = [dict(tweet) for tweet in tweets]
        tweets_df = pd.DataFrame(tweets_dict_list)
        tweets_df.index = tweets_df.id

        try:
            # write new data set to .csv file without duplicates
            self.tweets_df = pd.concat([self.tweets_df, tweets_df],
                                       axis=0,
                                       join='outer')
            self.tweets_df = self.tweets_df[~self.tweets_df.index.duplicated()]
            self.tweets_df.to_csv("{}_tweets.csv".format(self.topic))
        except:
            # save backup of collected tweets
            tweets_df.to_csv("{}_{}_{}_backup_tweets.csv".format(
                self.topic,
                datetime.datetime.now().date(),
                datetime.datetime.now().time()))
Ejemplo n.º 23
0
def fullarchivetweetsearch(event, context):
    data = json.loads(event['body'])
    screen_name = data['screenname']
    hash_tag = data['hashtag']
    from_past_number_of_days = data['numberofDays']

    """
        Call the method to get the access token
    """
    access_token = app_only_oauth_access_token(os.environ['CONSUMER_KEY'], os.environ['CONSUMER_SECRET'])

    from_to_dates = get_tweet_time_window(from_past_number_of_days)
    """
       Generate the rule criteria to filter the tweets
    """
    rule = gen_rule_payload("from:" + screen_name + " lang:en " + hash_tag,
                            from_date=str(from_to_dates['from_date']),
                            to_date=str(from_to_dates['to_date']),
                            results_per_call=100)
    print("rule:", rule)

    search_args = {
        "bearer_token": access_token,
        "endpoint": os.environ['FULLARCHIVE_TWEETSEARCH_ENDPOINT']}

    """
        calling the twitter api
    """
    tweets_list = collect_results(rule,
                                  max_results=100,
                                  result_stream_args=search_args)
    appended_tweets = []
    """
        Iterating the twitter search response
    """
    for tweet in tweets_list:
        appended_tweets.append(str(tweet.created_at_datetime) + " " + tweet.text)

    json_response = {
        "Given Hashtag": hash_tag,
        "Given TwitterAccount": screen_name,
        "Tweet count": str(len(tweets_list)),
        "Tweet Text": appended_tweets
    }
    output = {'statusCode': 200, 'body': json.dumps(json_response)}
    return output
Ejemplo n.º 24
0
def get_tweets(keyword,
               limit='100',
               begin_date=datetime.now().strftime('%Y-%m-%d'),
               end_date=datetime.now().strftime('%Y-%m-%d'),
               lang='id'):
    query = keyword + ' lang:' + lang

    rule = gen_rule_payload(query,
                            from_date=begin_date,
                            to_date=end_date,
                            results_per_call=500)

    tweets = collect_results(rule,
                             max_results=500,
                             result_stream_args=search_args)

    return [tweet.all_text for tweet in tweets]
Ejemplo n.º 25
0
def auth(dates):
    premium_args = load_credentials(filename="credentials.yaml",
                                    yaml_key='search_tweets_api_dev',
                                    env_overwrite=False)
    # Change the below string to the candidate you're looking for info on. Don't remove the lang:en otherwise you'll
    # get results in any language
    queryString = 'Donald Trump lang:en'
    rule = gen_rule_payload(queryString,
                            results_per_call=100,
                            from_date=dates[0],
                            to_date=dates[1])
    print(rule)
    tweets = collect_results(rule,
                             max_results=100,
                             result_stream_args=premium_args)
    [print(tweet.all_text) for tweet in tweets]
    return tweets, queryString
Ejemplo n.º 26
0
def collect_tweets(query, from_date, to_date, results_per_call, max_results,
                   premium_search_args):
    # query: rule to query twitter API. eg if wanting to collect tweets related to bitcoin, then query='bitcoin'
    # maxResults is capped at 100 for sandbox account, even though there should be a next function to get more, it
    # appears max_results=500 is accepted without any extra work
    # date format: YYYY-mm-DD HH:MM  string format which is automatically called by convert_utc_time. eg '2019-09-09' -> '201909090000'
    # from_date is inclusive. to_date is non-inclusive. Appears to start at from_date and start collecting tweets working
    # backwards to to_date
    collect_rule = gen_rule_payload(pt_rule=query,
                                    results_per_call=results_per_call,
                                    from_date=from_date,
                                    to_date=to_date)
    print(collect_rule)
    collected_tweets = collect_results(collect_rule,
                                       max_results=max_results,
                                       result_stream_args=premium_search_args)
    return collected_tweets
Ejemplo n.º 27
0
def get_data(search_query, api_key, secret_key, to_date, from_date, filename):
    """ get twitter data through twitter API from full archive search sand box and return all twitters in JSONL file
    based on 
     search term, 
     the geographic location of interest
     the time period of interest.
     and personal twitter account information.

     Reference: https://github.com/geduldig/TwitterAPI/tree/master/TwitterAPI
     Reference: https://developer.twitter.com/en/docs/tweets/search/overview
    """
    print_after_x = 1000
    config = dict(
        search_tweets_api=dict(
            account_type='premium',
            endpoint=f"https://api.twitter.com/1.1/tweets/search/{'fullarchive'}/{'mangroveConservation'}.json",
            consumer_key=api_key,
            consumer_secret=secret_key
        )
    )
    with open('twitter_keys.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)
    from searchtweets import load_credentials, gen_rule_payload, ResultStream

    premium_search_args = load_credentials("twitter_keys.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)
    rule = gen_rule_payload(search_query,
                            results_per_call=100,
                            from_date=from_date,
                            to_date=to_date
                            )
    temp = ResultStream(rule_payload=rule,
                      max_results=100000,
                      **premium_search_args)
    with open(filename, 'a', encoding='utf-8') as temp_file:
        num = 0
        for tweet in temp.stream():
            num += 1
            if num % print_after_x == 0:
                print('{0}: {1}'.format(str(num), tweet['created_at']))
            json.dump(tweet, temp_file)
            temp_file.write('\n')
    print('done')
Ejemplo n.º 28
0
 def createTestData(search_string):
     try:
         print('Start Fetching')
         #print(date,nextdate)
         rule = gen_rule_payload(search_string,
                                 from_date="2019-05-18",
                                 to_date="2019-05-20",
                                 
                                 results_per_call=500)
         
         alltweets = collect_results(rule,
                                  max_results=500,
                                  result_stream_args=premium_search_args)
         print("data fetched")
         
         return alltweets
             
     except:
         print("error")
def save_old_tweets():
    from searchtweets import load_credentials, gen_rule_payload, ResultStream
    import json

    premium_search_args = load_credentials("twitter_keys_fullarchive.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)

    query = "from:NTOO_Org"
    rule = gen_rule_payload(query, results_per_call=100)

    rs = ResultStream(rule_payload=rule,
                      max_results=1000,
                      **premium_search_args)

    with open('fullTweetsData.json', 'a', encoding='utf-8') as f:
        for tweet in rs.stream():
            json.dump(tweet, f)
            f.write('\n')
def read_stream(apiscope, label):
    API_KEY = api_key
    API_SECRET_KEY = api_secret_key
    DEV_ENVIRONMENT_LABEL = label
    API_SCOPE = apiscope  # 'fullarchive'  # 'fullarchive' for full archive, '30day' for last 31 days

    SEARCH_QUERY = 'delays, @WestMidRailway OR @NetworkRailBHM OR @networkrail'
    RESULTS_PER_CALL = 100  # 100 for sandbox, 500 for paid tiers
    TO_DATE = '2021-01-30'  # format YYYY-MM-DD HH:MM (hour and minutes optional)
    FROM_DATE = '2021-01-01'  # format YYYY-MM-DD HH:MM (hour and minutes optional)

    MAX_RESULTS = 10000  # Number of Tweets you want to collect

    # --------------------------- STOP -------------------------------#
    # Don't edit anything below, if you don't know what you are doing.
    # --------------------------- STOP -------------------------------#

    config = dict(search_tweets_api=dict(
        account_type='premium',
        endpoint=
        f"https://api.twitter.com/1.1/tweets/search/{API_SCOPE}/{DEV_ENVIRONMENT_LABEL}.json",
        consumer_key=API_KEY,
        consumer_secret=API_SECRET_KEY))

    with open('twitter_keys.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    premium_search_args = load_credentials("twitter_keys.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)

    rule = gen_rule_payload(SEARCH_QUERY,
                            results_per_call=RESULTS_PER_CALL,
                            from_date=FROM_DATE,
                            to_date=TO_DATE)

    rs = ResultStream(rule_payload=rule,
                      max_results=MAX_RESULTS,
                      **premium_search_args)

    return rs