Ejemplos de gen_rule_payload en Python, ejemplos de searchtweets.gen_rule_payload en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: premium_search.py Proyecto: asoa/Twitter-Movie-Sentiment

    def create_search_payload(self):
        if self.do_sandbox:
            self.rule = gen_rule_payload(self.raw_rule, results_per_call=100,
                                         from_date=self.from_date, to_date=self.to_date)

        else:
            self.rule = gen_rule_payload(self.raw_rule, results_per_call=500,
                                         from_date=self.from_date, to_date=self.to_date)

Ejemplo n.º 2

0

Mostrar archivo

def get_file(aname,
             cak,
             cask,
             etype,
             hashtag,
             keywords,
             fdate='00-00-0000',
             tdate='00-00-0000',
             ftime='00:00',
             ttime='00:00'):

    if etype == 'efa':  # Full archive scraping (refer to limits on README)
        endp = 'https://api.twitter.com/1.1/tweets/search/fullarchive/' + aname + '.json'
    elif etype == 'tdays':  # 30 days scraping (refer to limits on README)
        endp = 'https://api.twitter.com/1.1/tweets/search/30day/' + aname + '.json'
    else:
        endp = 'ERROR'

    # Creating a yaml credentials file
    config = dict(search_tweets_api=dict(account_type='premium',
                                         endpoint=endp,
                                         consumer_key=cak,
                                         consumer_secret=cask))

    with open('C:\\Users\\Samuktha\\Documents\\USC\\twitter\\proj\\cred.yaml',
              'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    # loading credentials
    premium_search_args = load_credentials(
        'C:\\Users\\Samuktha\\Documents\\USC\\twitter\\proj\\cred.yaml',
        yaml_key='search_tweets_api',
        env_overwrite=True)
    print(premium_search_args)

    if etype == 'efa':
        rule = gen_rule_payload(
            results_per_call=100,
            from_date=fdate + ' ' + ftime,  #"2019-07-06 01:00",
            to_date=tdate + ' ' + ttime,  #"2019-07-06 02:15",
            pt_rule=keywords,
        )
    else:
        rule = gen_rule_payload(results_per_call=100, pt_rule=keywords)

    # result stream

    rs = ResultStream(rule_payload=rule, max_results=50, **premium_search_args)

    return rs

Ejemplo n.º 3

0

Mostrar archivo

def searchtweets_query(file_name='../../Twitter_cred/full_arch_2007-2020.txt'):

    premium_search_args = load_credentials(
        filename="~/.twitter_keys.yaml",
        yaml_key="search_tweets_fullarchive_dev",
        env_overwrite=False)

    print(premium_search_args)
    print()

    #query = "(nat OR natte OR water OR wateroverlast OR regen OR storm OR blank OR bui OR overstroming OR hoosbui OR schade OR noodweer OR wolkbreuk OR waterschade) has:geo place_country:NL"
    query = "(wateroverlast OR overstroming OR waterschade) has:geo place_country:NL"

    do_query = False

    if do_query:
        from_date = "2007-01-01"
        to_date = "2020-01-01"
        rule = gen_rule_payload(query,
                                results_per_call=500,
                                from_date=from_date,
                                to_date=to_date)
        tweets = collect_results(rule,
                                 max_results=500 * 50,
                                 result_stream_args=premium_search_args
                                 )  # change this if you need to
        for tweet in tweets:
            with open(file_name, 'a') as fp:
                fp.write(json.dumps(tweet) + '\n')
    else:
        print(
            "No query was done, in order to perform a Twitter query, set do_query to True in Twitter/searchtweets_query.py"
        )

Ejemplo n.º 4

0

Mostrar archivo

Archivo: dataresource.py Proyecto: nsp8/Schedulers

def get_tweets(query_set, twitter_args, query_filter=None):
    tweets_list = list()
    params = c.TWITTER_PARAMS
    for query in query_set:
        curr_month = "{}-{}".format(dt.now().year,
                                    format_date_str(dt.now().month))
        _, curr_usage = get_twitter_api_usage(curr_month)
        if curr_usage >= 24999:
            print("Twitter API limit is about to exceed! Returning now ...\n")
            break
        if query_filter:
            q = '("{}") {}'.format(query, query_filter)
        else:
            q = "{}".format(query)
            print("No filter/Filter in query_set: {}".format(q))
        print("Collecting for {}".format(q))
        try:
            rule = gen_rule_payload(
                q, results_per_call=params["RESULTS_PER_CALL"])
            tweets = collect_results(rule,
                                     max_results=params["MAX_RESULTS"],
                                     result_stream_args=twitter_args)
            print("number of tweets: {}".format(len(tweets)))
            update_twitter_api_usage(curr_month, len(tweets))
            tweets_list.append(tweets)

        except Exception as e:
            print("Exception occurred while fetching tweets: {}".format(e))
            break
    return tweets_list

Ejemplo n.º 5

0

Mostrar archivo

Archivo: TwitterAltcoinData.py Proyecto: arthurlewisbrown/CryptoAltcoinPricePrediction

    def premium_set_search_params(self,
                                  search_query,
                                  from_date,
                                  to_date,
                                  no_retweets=True,
                                  results_per_call=500):
        """
        Sets the Search Query and maximum Tweets
        to be retrieved to save Quota
        """

        # Set a static Language Filter for English Tweets
        lang_filter = ' lang:en'
        if no_retweets:
            rt_filter = ' -is:retweet'
            # Adds an ignore Retweets tag to the (Altcoin) Query
            self.query = search_query + lang_filter + rt_filter
        else:
            # This Query includes all Tweets, also Retweets
            self.query = search_query + lang_filter
        # Sets the Rule for the Query to be executed (time frame & # of Results)
        self.rule = gen_rule_payload(self.query,
                                     results_per_call=results_per_call,
                                     from_date=from_date,
                                     to_date=to_date)

Ejemplo n.º 6

0

Mostrar archivo

def counts(queries, nameList):
    # premium_search_args = load_credentials(filename="twitter_keys.yaml", yaml_key="search_tweets_api", env_overwrite=False)
    # queries = ['"$LTC" OR "Litecoin"','"$ETH" OR "Ethereum"','"$BTC" OR "Bitcoin"', 'Holochain', '"$NPXS" OR "Pundi X"']

    counts = []
    for i in range(0, len(queries)):
        count_rule = gen_rule_payload(queries[i], count_bucket="day")
        temp = collect_results(count_rule,
                               result_stream_args=premium_search_args)
        print(temp)
        print("\n")
        counts.append(temp[1]['count'])
    print('\n', counts)
    """CryptoCompare"""
    from cryptocompy import price

    avgPrices = []
    toCurr = 'USD'
    yesterday = date.today() - timedelta(1)
    datestr = str(yesterday) + ' 00:00:00'

    for elem in nameList:
        # avgtemp = price.get_day_average_price(elem[0], toCurr)[elem[0]]['USD']
        # avgPrices.append(avgtemp)
        eodtemp = price.get_historical_eod_price(elem[0],
                                                 toCurr,
                                                 datestr,
                                                 try_conversion=True)
        eodtemp = eodtemp[elem[0]][toCurr]
        avgPrices.append(eodtemp)

    plot(counts, avgPrices, nameList)

Ejemplo n.º 7

0

Mostrar archivo

def search_lima(search, premium_search_args):
    # rule = gen_rule_payload(search + " point_radius:[-12.089282 -77.020041 10mi]", results_per_call=100)
    rule = gen_rule_payload(search + "place:Peru", results_per_call=100)
    data = collect_results(rule,
                           max_results=100,
                           result_stream_args=premium_search_args)
    return data

Ejemplo n.º 8

0

Mostrar archivo

Archivo: searcher.py Proyecto: teovrs/OpinionDynamics-Covid19

    def arquive_search(self,
                       query,
                       start,
                       end,
                       dev_env,
                       max_size=2500,
                       max_call=100):
        self.settings['search_tweets_api']['endpoint'] =\
           f"https://api.twitter.com/1.1/tweets/search/fullarchive/{dev_env}.json"

        credentials = load_credentials("archive_keys.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)

        with open('archive_keys.yaml', 'w') as config_file:
            yaml.dump(self.settings, config_file, default_flow_style=False)

        q_rule = gen_rule_payload(query,
                                  results_per_call=max_call,
                                  from_date=start,
                                  to_date=end)

        rs = ResultStream(rule_payload=q_rule,
                          max_results=max_size,
                          **credentials)

        with open('tweet_data_archive.csv', 'a', encoding='utf-8') as file:
            n = 0
            for tweet in rs.stream():
                n += 1
                if n % (max_size / 10) == 0:
                    print('{0}: {1}'.format(str(n), tweet['created_at']))
                json.dump(tweet, file)
                file.write('\n')

Ejemplo n.º 9

0

Mostrar archivo

Archivo: _pull_data.py Proyecto: kanav-mehra/solve-iwmi

def count_tweets(query,
                 from_date,
                 to_date,
                 credentials_path,
                 yaml_key,
                 count_bucket="day",
                 results_per_call=500,
                 verbose=False,
                 **kwargs):
    """
    Returns the number of existing Tweets for a given query and time
    frame. Since this function doesn't pull tweets, this is a safe option
    to check the effectiveness of your filters without exhausting the
    API's capacity.

    Parameters
    ----------
    query : str
        Query passed to the Twitter API to fecth Tweets.
    from_date : str or None
        Date format as specified by `convert_utc_time` for the starting time
        of your search.
    to_date : str or None
        Date format as specified by `convert_utc_time` for the end time of
        your search.
    credentials_path : str
        Path for the yaml file with the Twitter API credentials.
    yaml_key : str
        Key within the yaml file containing the Twitter API credentials to be
        used.
    count_bucket : str or None, default="day"
        If using the counts api endpoint, will define the count bucket for
        which tweets are aggregated.
    results_per_call : int, default=500
        Number of Tweets returned per call.
    verbose : int or bool, default=False
        Controls the verbosity when pulling the tweet count.

    Returns
    -------
    counts : dict
        Number of existing tweets for each bucket.
    """

    logger = logging.getLogger(__name__)
    logger.propagate = verbose
    logger.info('Counting Tweets')

    search_args = load_credentials(credentials_path, yaml_key=yaml_key)

    count_rule = gen_rule_payload(query,
                                  from_date=from_date,
                                  to_date=to_date,
                                  count_bucket=count_bucket,
                                  results_per_call=results_per_call)

    counts = collect_results(count_rule, result_stream_args=search_args)

    return counts

Ejemplo n.º 10

0

Mostrar archivo

Archivo: premium_search.py Proyecto: asoa/Twitter-Movie-Sentiment

    def get_rule_count(self):
        """ before calling the production api, get a count of the tweets that match the rule """
        rule_count = gen_rule_payload(self.raw_rule,
                                      from_date=self.from_date, to_date=self.to_date,
                                      results_per_call=500, count_bucket='day')

        counts_list = collect_results(rule_count, max_results=500, result_stream_args=self.premium_search_args)
        [print(count) for count in counts_list]

Ejemplo n.º 11

0

Mostrar archivo

def use_premium(search, filename, from_date, to_date, enpoint='full'):
    '''
    Collect historical tweets
    '''
    if endpoint == '30day':
        endpoint_key = 'search_premium_30day_api'
        #endpoint_key = 'search_lynxx_30day_api'
    else:
        endpoint_key = 'search_premium_full_api'
        #endpoint_key = 'search_lynxx_full_api'

    try:
        tweet_df = pd.read_csv(filename, dtype=str, encoding='ISO-8859-1')
    except FileNotFoundError:
        tweet_df = pd.DataFrame()

    # Extract the credentials for the endpoint.
    search_stream = load_credentials(filename='./credentials.yaml',
                                     yaml_key=endpoint_key,
                                     env_overwrite=False)

    # Collect tweets while we are permitted.
    # Todo: Still dont know how to catch the re-try limit error?
    while to_date > from_date:

        rule = gen_rule_payload(search,
                                from_date=from_date,
                                to_date=to_date,
                                results_per_call=100)
        try:
            tweets = collect_results(rule,
                                     max_results=2000,
                                     result_stream_args=search_stream)
        except:
            break

        for idx, tweet in enumerate(tweets):
            tweet_df = tweet_df.append([json_normalize(tweet)],
                                       ignore_index=True,
                                       sort=False)

            if idx % 1000 == 0:
                print(f'{tweet["created_at"]}: {tweet["text"]}')
                tweet_df.to_csv(filename, index=False)

        tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'],
                                                utc=True)
        mindate = min(tweet_df['created_at']).date() - timedelta(hours=1)
        to_date = mindate.strftime('%Y-%m-%d %H:%M')

    tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'])
    min(tweet_df['created_at'])

    tweet_df.drop_duplicates(subset=['created_at', 'user.screen_name'],
                             keep='first',
                             inplace=True)
    tweet_df.sort_values(by='created_at', inplace=True)
    tweet_df.to_csv(filename, index=False)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: data_scrapper.py Proyecto: anika2302/Review-Analysis

    def tw_get_premium_search(self, keyword: str):
        with open(f'datasets/tw_{keyword.lower()}_searches_premium.json',
                  'w') as f:
            try:
                f.write('{"statuses": [')

                rule = gen_rule_payload(
                    pt_rule="near:\"New York, NY\" within:50mi".format(),
                    results_per_call=100,
                    from_date="2018-07-01",
                    to_date="2018-10-01")

                rule = gen_rule_payload(
                    pt_rule="place:\"New York, NY\"".format(),
                    results_per_call=100,
                    from_date=(datetime.date.today() -
                               datetime.timedelta(31)).isoformat(),
                    to_date=datetime.date.today().isoformat())

                next_token = None
                while True:
                    results = ResultStream(rule_payload=rule,
                                           **self.twitter_premium_api)
                    results.next_token = next_token

                    tweets = []

                    try:
                        tweets = list(results.stream())
                    except Exception as ex:
                        print(str(ex))

                    for tweet in tweets:
                        f.write("%s," % json.dumps(tweet))

                    if results.next_token is None:
                        break
                    else:
                        next_token = results.next_token

                next_token is not None and f.seek(f.tell() - 1, os.SEEK_SET)
                f.write("]}")

            except Exception as ex:
                print("Error:\n" + str(ex))

Ejemplo n.º 13

0

Mostrar archivo

Archivo: tweet_downloader.py Proyecto: x0rzkov/graphy-backend

def _download_tweets(trend, enterprise_search_args):
    powertrack_rule = '(has:geo OR has:profile_geo) lang:en -is:retweet %s' % trend
    rule = gen_rule_payload(powertrack_rule, results_per_call=500)
    rs = ResultStream(rule_payload=rule,
                      max_requests=2,
                      **enterprise_search_args)
    for tweet in rs.stream():
        print(tweet)
        _store_tweet(tweet)

Ejemplo n.º 14

0

Mostrar archivo

def get_tweets(trend,date):
    enddate = date+datetime.timedelta(days=1)
    username="******"
    password="******"
    endpoint="https://gnip-api.twitter.com/search/fullarchive/accounts/greg-students/prod.json"
    bearer_token=""
    rule = gen_rule_payload(trend+" lang:en",from_date=date.isoformat() ,to_date=enddate.isoformat(), results_per_call=500) # testing with a sandbox account
    rs=ResultStream(rule_payload=rule,max_results=10000,max_pages=10, username=username,endpoint=endpoint, password=password)
    #tweets=collect_results(rule, result_stream_args=args,max_results=20000)
    return rs

Ejemplo n.º 15

0

Mostrar archivo

 def __init__(self, search_query):
     print(self.__class__.__name__)
     self.premium_search_args = searchtweets.load_credentials()
     self.rule = searchtweets.gen_rule_payload(
         search_query.query,
         to_date=(datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d'))
     try:
         self.iter = iter(
             searchtweets.collect_results(
                 self.rule, result_stream_args=self.premium_search_args))
     except Exception:
         self.iter = iter([])

Ejemplo n.º 16

0

Mostrar archivo

Archivo: helpers.py Proyecto: Heaplevel/discord-heaplevelbot

def read_tweets(term):
    """
    @return: string output split into 2000 messages.
    """
    rule = gen_rule_payload(
        term, results_per_call=100)  # testing with a sandbox account
    print(rule)
    tweets = collect_results(rule, 100, premium_search_args())
    print(tweets[:10])
    output = '\n\n'.join(
        [f'@{t.screen_name}: {t.all_text}' for t in tweets[:10]])
    output = split_2000(output)
    return output

Ejemplo n.º 17

0

Mostrar archivo

Archivo: twitterApi.py Proyecto: JKornberg/PolitiVibe

def get_premium_tweets(candidate, handle, topic):
    rule = gen_rule_payload(topic + " to:" + handle, results_per_call=100)
    tweets = collect_results(rule,
                             max_results=100,
                             result_stream_args=premium_search_args)
    data = TweetHelpers.get_tweet_objects(candidate, topic)
    if os.path.exists(candidate + "/" + topic + "/raw/tweets.pkl"):
        os.remove(candidate + "/" + topic + "/raw/tweets.pkl")
    if data is None:
        data = []
    data += tweets
    TweetHelpers.pickle_data(candidate + "/" + topic + "/raw/tweets.pkl",
                             tweets)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: ub_model_v2.py Proyecto: DataScienceUnbound/Subs_project

 def make_rule(handle, to_date, from_date, results_per_call):
     """
     Inputs:
         - handle (should be changed to id)
         - to_date
     """
     #print('Using',results_per_call,' results per call. Should be 100 for sandbox, 500 for premium')
     _rule_a = "from:" + handle
     rule = gen_rule_payload(_rule_a,
                             from_date=from_date,
                             to_date=to_date,
                             results_per_call=results_per_call)
     return rule

Ejemplo n.º 19

0

Mostrar archivo

Archivo: main.py Proyecto: x0rzkov/graphy-backend

def _download_tweets(trend):
    powertrack_rule = '%s (has:geo OR has:profile_geo) lang:en -is:retweet' % trend
    rule = gen_rule_payload(powertrack_rule,
                            results_per_call=500,
                            to_date=None,
                            from_date='201207220000')
    logging.info("PowerTrack rule: %s" % rule)
    rs = ResultStream(rule_payload=rule,
                      max_results=500,
                      max_requests=1,
                      **enterprise_search_args)
    for tweet in rs.stream():
        _push_tweet(tweet, trend)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: tweet_collector.py Proyecto: Qlwentt/emotable

 def get_emoji_tweets(self, emoji_list):
     emoji_list = ' OR '.join(emoji_list)
     print(emoji_list)
     max_tweets = 100
     rule = searchtweets.gen_rule_payload(
         emoji_list,
         # from_date="2017-01-01", #UTC 2017-09-01 00:00
         # to_date="2019-02-12",#UTC 2017-10-30 00:00
         results_per_call=max_tweets)
     print(rule)
     tweets = searchtweets.collect_results(
         rule, max_results=500, result_stream_args=self.premium_search_args)
     return tweets

Ejemplo n.º 21

0

Mostrar archivo

 def get_premium_all_tweets(self):
     rule_str = "from:" + self.twitter_user.screen_name
     print("get_all_twitter_user_tweets: rule_str: " + rule_str)
     rule = gen_rule_payload(rule_str)
     tweets_paresd = []
     try:
         tweets = collect_results(rule, max_results=100, result_stream_args=search_args)
         print("tweets len:" + str(len(tweets)))
         for t in tweets:
             tweets_paresd.append(twitter.Status().NewFromJsonDict(t))
         print("tweets_paresd len:" + str(len(tweets_paresd)))
         self.place.add_tweet_list(tweets_paresd, self.user, self.region, old_user=True)
     except Exception as exc:
         print("In get_all_twitter_user_tweets, Problem loading tweets")
         print(exc)
     return tweets_paresd

Ejemplo n.º 22

0

Mostrar archivo

Archivo: TweetCollector.py Proyecto: ThePolarizationOfInformationOnTheWeb/ThePolarizationOfInformation

    def collect_and_write_tweets(self,
                                 query: str,
                                 results_per_call: int = 100,
                                 num_tweets: int = 100,
                                 from_date: datetime.date = None,
                                 to_date: datetime.date = None):
        """
        :param query:
        :param results_per_call
        :param num_tweets:
        :param from_date:
        :param to_date:
        :return:
        """

        if results_per_call > 100:
            print(
                "Sandbox API limited to 100 results per request, cannot retrieve {} results"
                .format(results_per_call))

        rule = gen_rule_payload(query,
                                results_per_call=results_per_call,
                                from_date=from_date.isoformat(),
                                to_date=to_date.isoformat())

        tweets = collect_results(rule,
                                 max_results=num_tweets,
                                 result_stream_args=self.premium_search_args)

        # cast tweet objects to dict and create pandas data frame
        tweets_dict_list = [dict(tweet) for tweet in tweets]
        tweets_df = pd.DataFrame(tweets_dict_list)
        tweets_df.index = tweets_df.id

        try:
            # write new data set to .csv file without duplicates
            self.tweets_df = pd.concat([self.tweets_df, tweets_df],
                                       axis=0,
                                       join='outer')
            self.tweets_df = self.tweets_df[~self.tweets_df.index.duplicated()]
            self.tweets_df.to_csv("{}_tweets.csv".format(self.topic))
        except:
            # save backup of collected tweets
            tweets_df.to_csv("{}_{}_{}_backup_tweets.csv".format(
                self.topic,
                datetime.datetime.now().date(),
                datetime.datetime.now().time()))

Ejemplo n.º 23

0

Mostrar archivo

def fullarchivetweetsearch(event, context):
    data = json.loads(event['body'])
    screen_name = data['screenname']
    hash_tag = data['hashtag']
    from_past_number_of_days = data['numberofDays']

    """
        Call the method to get the access token
    """
    access_token = app_only_oauth_access_token(os.environ['CONSUMER_KEY'], os.environ['CONSUMER_SECRET'])

    from_to_dates = get_tweet_time_window(from_past_number_of_days)
    """
       Generate the rule criteria to filter the tweets
    """
    rule = gen_rule_payload("from:" + screen_name + " lang:en " + hash_tag,
                            from_date=str(from_to_dates['from_date']),
                            to_date=str(from_to_dates['to_date']),
                            results_per_call=100)
    print("rule:", rule)

    search_args = {
        "bearer_token": access_token,
        "endpoint": os.environ['FULLARCHIVE_TWEETSEARCH_ENDPOINT']}

    """
        calling the twitter api
    """
    tweets_list = collect_results(rule,
                                  max_results=100,
                                  result_stream_args=search_args)
    appended_tweets = []
    """
        Iterating the twitter search response
    """
    for tweet in tweets_list:
        appended_tweets.append(str(tweet.created_at_datetime) + " " + tweet.text)

    json_response = {
        "Given Hashtag": hash_tag,
        "Given TwitterAccount": screen_name,
        "Tweet count": str(len(tweets_list)),
        "Tweet Text": appended_tweets
    }
    output = {'statusCode': 200, 'body': json.dumps(json_response)}
    return output

Ejemplo n.º 24

0

Mostrar archivo

Archivo: query.py Proyecto: adhxll/project_IR

def get_tweets(keyword,
               limit='100',
               begin_date=datetime.now().strftime('%Y-%m-%d'),
               end_date=datetime.now().strftime('%Y-%m-%d'),
               lang='id'):
    query = keyword + ' lang:' + lang

    rule = gen_rule_payload(query,
                            from_date=begin_date,
                            to_date=end_date,
                            results_per_call=500)

    tweets = collect_results(rule,
                             max_results=500,
                             result_stream_args=search_args)

    return [tweet.all_text for tweet in tweets]

Ejemplo n.º 25

0

Mostrar archivo

Archivo: dataExtract.py Proyecto: foesa/MachineLearningProject

def auth(dates):
    premium_args = load_credentials(filename="credentials.yaml",
                                    yaml_key='search_tweets_api_dev',
                                    env_overwrite=False)
    # Change the below string to the candidate you're looking for info on. Don't remove the lang:en otherwise you'll
    # get results in any language
    queryString = 'Donald Trump lang:en'
    rule = gen_rule_payload(queryString,
                            results_per_call=100,
                            from_date=dates[0],
                            to_date=dates[1])
    print(rule)
    tweets = collect_results(rule,
                             max_results=100,
                             result_stream_args=premium_args)
    [print(tweet.all_text) for tweet in tweets]
    return tweets, queryString

Ejemplo n.º 26

0

Mostrar archivo

def collect_tweets(query, from_date, to_date, results_per_call, max_results,
                   premium_search_args):
    # query: rule to query twitter API. eg if wanting to collect tweets related to bitcoin, then query='bitcoin'
    # maxResults is capped at 100 for sandbox account, even though there should be a next function to get more, it
    # appears max_results=500 is accepted without any extra work
    # date format: YYYY-mm-DD HH:MM  string format which is automatically called by convert_utc_time. eg '2019-09-09' -> '201909090000'
    # from_date is inclusive. to_date is non-inclusive. Appears to start at from_date and start collecting tweets working
    # backwards to to_date
    collect_rule = gen_rule_payload(pt_rule=query,
                                    results_per_call=results_per_call,
                                    from_date=from_date,
                                    to_date=to_date)
    print(collect_rule)
    collected_tweets = collect_results(collect_rule,
                                       max_results=max_results,
                                       result_stream_args=premium_search_args)
    return collected_tweets

Ejemplo n.º 27

0

Mostrar archivo

def get_data(search_query, api_key, secret_key, to_date, from_date, filename):
    """ get twitter data through twitter API from full archive search sand box and return all twitters in JSONL file
    based on 
     search term, 
     the geographic location of interest
     the time period of interest.
     and personal twitter account information.

     Reference: https://github.com/geduldig/TwitterAPI/tree/master/TwitterAPI
     Reference: https://developer.twitter.com/en/docs/tweets/search/overview
    """
    print_after_x = 1000
    config = dict(
        search_tweets_api=dict(
            account_type='premium',
            endpoint=f"https://api.twitter.com/1.1/tweets/search/{'fullarchive'}/{'mangroveConservation'}.json",
            consumer_key=api_key,
            consumer_secret=secret_key
        )
    )
    with open('twitter_keys.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)
    from searchtweets import load_credentials, gen_rule_payload, ResultStream

    premium_search_args = load_credentials("twitter_keys.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)
    rule = gen_rule_payload(search_query,
                            results_per_call=100,
                            from_date=from_date,
                            to_date=to_date
                            )
    temp = ResultStream(rule_payload=rule,
                      max_results=100000,
                      **premium_search_args)
    with open(filename, 'a', encoding='utf-8') as temp_file:
        num = 0
        for tweet in temp.stream():
            num += 1
            if num % print_after_x == 0:
                print('{0}: {1}'.format(str(num), tweet['created_at']))
            json.dump(tweet, temp_file)
            temp_file.write('\n')
    print('done')

Ejemplo n.º 28

0

Mostrar archivo

 def createTestData(search_string):
     try:
         print('Start Fetching')
         #print(date,nextdate)
         rule = gen_rule_payload(search_string,
                                 from_date="2019-05-18",
                                 to_date="2019-05-20",
                                 
                                 results_per_call=500)
         
         alltweets = collect_results(rule,
                                  max_results=500,
                                  result_stream_args=premium_search_args)
         print("data fetched")
         
         return alltweets
             
     except:
         print("error")

Ejemplo n.º 29

0

Mostrar archivo

Archivo: twitter_helper.py Proyecto: HealthHackAu2020/not_the_only_one

def save_old_tweets():
    from searchtweets import load_credentials, gen_rule_payload, ResultStream
    import json

    premium_search_args = load_credentials("twitter_keys_fullarchive.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)

    query = "from:NTOO_Org"
    rule = gen_rule_payload(query, results_per_call=100)

    rs = ResultStream(rule_payload=rule,
                      max_results=1000,
                      **premium_search_args)

    with open('fullTweetsData.json', 'a', encoding='utf-8') as f:
        for tweet in rs.stream():
            json.dump(tweet, f)
            f.write('\n')

Ejemplo n.º 30

0

Mostrar archivo

Archivo: sandbox_search.py Proyecto: krishnan2107/SentimentAnalysis

def read_stream(apiscope, label):
    API_KEY = api_key
    API_SECRET_KEY = api_secret_key
    DEV_ENVIRONMENT_LABEL = label
    API_SCOPE = apiscope  # 'fullarchive'  # 'fullarchive' for full archive, '30day' for last 31 days

    SEARCH_QUERY = 'delays, @WestMidRailway OR @NetworkRailBHM OR @networkrail'
    RESULTS_PER_CALL = 100  # 100 for sandbox, 500 for paid tiers
    TO_DATE = '2021-01-30'  # format YYYY-MM-DD HH:MM (hour and minutes optional)
    FROM_DATE = '2021-01-01'  # format YYYY-MM-DD HH:MM (hour and minutes optional)

    MAX_RESULTS = 10000  # Number of Tweets you want to collect

    # --------------------------- STOP -------------------------------#
    # Don't edit anything below, if you don't know what you are doing.
    # --------------------------- STOP -------------------------------#

    config = dict(search_tweets_api=dict(
        account_type='premium',
        endpoint=
        f"https://api.twitter.com/1.1/tweets/search/{API_SCOPE}/{DEV_ENVIRONMENT_LABEL}.json",
        consumer_key=API_KEY,
        consumer_secret=API_SECRET_KEY))

    with open('twitter_keys.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    premium_search_args = load_credentials("twitter_keys.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)

    rule = gen_rule_payload(SEARCH_QUERY,
                            results_per_call=RESULTS_PER_CALL,
                            from_date=FROM_DATE,
                            to_date=TO_DATE)

    rs = ResultStream(rule_payload=rule,
                      max_results=MAX_RESULTS,
                      **premium_search_args)

    return rs