Example #1
0
def ggl_trends(grouped, keyword):
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = [keyword]
    pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='US', gprop='')
    ggl_trends = pytrends.interest_over_time()
    if ggl_trends.empty:
        return pd.DataFrame() 
    grouped_ggl_trends = ggl_trends.groupby(pd.Grouper(freq='1m')).mean().rename(columns={keyword: 'Google Trends'})
    return grouped.merge(grouped_ggl_trends, left_index=True, right_index=True, how='inner')
def regions(l_args, s_ticker):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="regions",
        description=
        """Plot bars of regions based on stock's interest. [Source: Google]""",
    )

    parser.add_argument(
        "-n",
        "--num",
        action="store",
        dest="n_num",
        type=check_positive,
        default=10,
        help="number of regions to plot that show highest interest.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        pytrend = TrendReq()
        pytrend.build_payload(kw_list=[s_ticker])
        df_interest_region = pytrend.interest_by_region()
        df_interest_region = df_interest_region.sort_values(
            [s_ticker], ascending=False).head(ns_parser.n_num)

        plt.figure(figsize=(25, 5))
        plt.title(f"Top's regions interest on {s_ticker}")
        plt.bar(df_interest_region.index,
                df_interest_region[s_ticker],
                width=0.8)
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.ylabel("Interest [%]")
        plt.xlabel("Region")
        plt.show()
        print("")

    except Exception as e:
        print(e)
        print("")
Example #3
0
 def fetch(self, keyword):
     import datetime
     pytrends = TrendReq(hl='en-US', tz=360)
     ndate = datetime.date.today()
     yr = ndate.year
     mnth = ndate.month
     day = ndate.day
     date_last_1y = datetime.date(int(yr), int(mnth),
                                  int(day)) - datetime.timedelta(days=365)
     from_to_date_1y = '{}-{}-{} {}'.format(date_last_1y.year,
                                            date_last_1y.month,
                                            date_last_1y.day, ndate)
     kw_list = [keyword]
     pytrends.build_payload(kw_list,
                            cat=0,
                            timeframe=from_to_date_1y,
                            geo=self.country,
                            gprop='')
     d = pytrends.interest_over_time()
     if not d.empty:
         t = int(d[keyword][-2])
         if t > 70:
             nature = 'Hot'
         elif t > 50 and t <= 70:
             nature = 'High'
         else:
             nature = 'Medium'
         xc = {
             'keyword': keyword,
             'source': 'googletrends',
             'type': 'hotness',
             'Nature': nature,
             'value': int(t)
         }
     else:
         xc = {
             'keyword': keyword,
             'source': 'googletrends',
             'type': 'hotness',
             'Nature': 'No Data',
             'value': 'No Data'
         }
     return xc
Example #4
0
def trender(a, term, term_no):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload(term, cat=0, timeframe='all', geo='GB', gprop='')
    interest = pytrends.interest_over_time()
    b = 0

    # set the values at the correct indices in the 2D result array
    for index, row in interest.iterrows():
        indexstring = str(index)
        result[b][0] = indexstring
        result[b][a + 1] = str(row[term[0]])
        result[b][a + 2] = str(row[term[1]])
        result[b][a + 3] = str(row[term[2]])
        result[b][a + 4] = str(row[term[3]])
        if len(term) == 5:
            result[b][a + 5] = str(row[term[4]])
        b += 1

    print term_no
Example #5
0
def getTrendData(keyword, year=YEAR, month=MONTH):

    pytrends = TrendReq(hl='en-US', tz=360)
    dataset = []
    # end_date = datetime.now()
    end_date = date(year, month, getLastDayOfMonth(year, month))
    start_date = date(year - 1, month, 1)
    thisYear = start_date.strftime('%Y-%m-%d') + ' ' + end_date.strftime(
        "%Y-%m-%d")

    pytrends.build_payload(kw_list=[keyword], timeframe=thisYear)
    data = pytrends.interest_over_time()
    if not data.empty:
        data = data.drop(labels=['isPartial'], axis='columns')
        dataset.append(data)

    dataset = pd.concat(dataset, axis=1)

    return dataset
Example #6
0
def prediction(key_word):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([key_word],
                           cat=0,
                           timeframe='2021-01-01 2021-01-15',
                           gprop='',
                           geo='')
    df = pytrends.interest_over_time()

    std = pd.DataFrame.from_dict(df)

    std['Moving Average'] = std[key_word].rolling(2).mean()
    std[[key_word, 'Moving Average']].plot(figsize=(10, 4))
    plt.grid(True)
    plt.title(key_word + " Google Trends" ' Moving Averages')
    plt.axis('tight')
    plt.ylabel('Searches')
    plt.savefig('template/static/images/prediction.png')
    plt.close()
Example #7
0
class GoogleTrendStatsEvaluator(StatsSocialEvaluator):
    def __init__(self):
        super().__init__()
        self.pytrends = None
        self.is_threaded = False

    # Use pytrends lib (https://github.com/GeneralMills/pytrends)
    # https://github.com/GeneralMills/pytrends/blob/master/examples/example.py
    def get_data(self):
        self.pytrends = TrendReq(hl='en-US', tz=0)
        # self.pytrends.GENERAL_URL = "https://trends.google.com/trends/explore"
        # self.symbol
        key_words = [self.symbol]
        try:
            # looks like only 1 and 3 months are working ...
            time_frame = "today " + str(self.social_config[STATS_EVALUATOR_HISTORY_TIME]) + "-m"
            # Attention apparement limite de request / h assez faible
            self.pytrends.build_payload(kw_list=key_words, cat=0, timeframe=time_frame, geo='', gprop='')
        except ResponseError as e:
            self.logger.warn(str(e))

    def eval_impl(self):
        interest_over_time_df = self.pytrends.interest_over_time()

        # compute bollinger bands
        self.eval_note = AdvancedManager.get_class(self.config, StatisticAnalysis).analyse_recent_trend_changes(
            interest_over_time_df[self.symbol], numpy.sqrt)

    def run(self):
        pass

    # check if history is not too high
    def load_config(self):
        super(GoogleTrendStatsEvaluator, self).load_config()
        if self.social_config[STATS_EVALUATOR_HISTORY_TIME] > STATS_EVALUATOR_MAX_HISTORY_TIME:
            self.social_config[STATS_EVALUATOR_HISTORY_TIME] = STATS_EVALUATOR_MAX_HISTORY_TIME

    def set_default_config(self):
        self.social_config = {
            CONFIG_REFRESH_RATE: 3600,
            STATS_EVALUATOR_HISTORY_TIME: 3
        }
Example #8
0
    def return_graph(self):
        trendshow = TrendReq(hl='en-US', tz=360)

        kw_list = []
        if self.top_bool:
            for k in range(0, 5):
                kw_list.append(self.get_top_anime_names(k, 'tv'))
        else:
            kw_list.append(self.search_anime(self.anime_name))
        kw_group = list(zip(*[iter(kw_list)] * 1))
        kw_grplist = [list(x) for x in kw_group]
        dic = {}
        i = 0
        for kw in kw_grplist:
            trendshow.build_payload(kw,
                                    timeframe='today ' + self.time_scale,
                                    geo='')
            dic[i] = trendshow.interest_over_time()
            i += 1

        trendframe = pd.concat(dic, axis=1)
        trendframe.columns = trendframe.columns.droplevel(0)
        trendframe = trendframe.drop('isPartial', axis=1)

        fig = {
            'data': [
                go.Scatter(x=trendframe.index,
                           y=trendframe[col],
                           name=col,
                           line=dict(color=self.graph_color))
                for col in trendframe.columns
            ],
            'layout':
            dict(
                #legend=dict(font=dict(color='#7f7f7f')),
                paper_bgcolor='#27293d',
                plot_bgcolor='rgba(0,0,0,0)',
                font=dict(color='white'),
                showlegend=True)
        }

        return dcc.Graph(id=self.graph_id, figure=fig)
Example #9
0
def get_searches(key_word):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([key_word],
                           cat=0,
                           timeframe='2020-01-01 2021-01-15',
                           gprop='',
                           geo='')
    df = pytrends.interest_over_time()

    print(df.head())

    sns.set()
    df['timestamp'] = pd.to_datetime(df.index)
    sns.lineplot(x=df['timestamp'], y=df[key_word])

    plt.title("Normalized Searches for {}".format(key_word))
    plt.ylabel("Number of Searches")
    plt.xlabel("Date")
    plt.savefig("template/static/images/search.png")
    plt.close()
def get_trend_df_list(init_trend_list, time_frame):

    trend_list_list = Functions.group_list_by_size(init_trend_list, 5)
    py_trends = TrendReq(hl='en-US', tz=360, timeout=(10, 25))
    trends_df_list = []
    for trend_list in trend_list_list:

        py_trends.build_payload(trend_list,
                                cat=0,
                                timeframe=time_frame,
                                geo="US",
                                gprop="")

        trends_df = py_trends.interest_over_time()
        trends_df = trends_df.drop(["isPartial"], axis=1)

        for col in trends_df.columns:
            trends_df_list.append(trends_df[[col]])

    return trends_df_list
Example #11
0
def get_keywords(request):

    if request.method == 'POST':
        form = KeyWords(request.POST)

        if form.is_valid():
            # process data
            keywords = form.cleaned_data['keywords'].split()
            pytrend = TrendReq()
            pytrend.build_payload(kw_list=keywords)
            interest_over_time = pytrend.interest_over_time()
            interest_html = interest_over_time.to_html()
            return render(request, 'index.html', {
                'data': interest_html,
                'form': form
            })
    else:
        form = KeyWords()

    return render(request, 'index.html', {'form': form})
Example #12
0
def update_trends(proxies: List[str] = ["http://179.108.169.71:8080"],
                  topics=topics,
                  countries=countries) -> None:

    for topic in topics:
        for country in countries:

            lang = state_lang(country)  #lang should be like in LANGUAGES.txt

            kw_list = load_kws(kw_tmpl + lang + ext)

            #pytrends initialization
            pytrends = TrendReq(hl=lang, tz=0)
            #pytrends = TrendReq(hl='lang', tz=360, timeout=(10,25), proxies=['https://34.203.233.13:80',], retries=2, backoff_factor=0.1)

            pytrends.build_payload(kw_list,
                                   cat=0,
                                   timeframe='today 1-w',
                                   geo='',
                                   gprop='')
def trends(topic):
    s = pd.date_range(start='1/1/2011', end='9/1/2018', freq='MS')  # start time
    e = pd.date_range(start='1/1/2011', end='10/1/2018', freq='M')  # end time
    pytrends = TrendReq(hl='en-US', tz=0)
    kw_list = [topic]  # topic

    df = None

    for i in range(len(s)):
        frame = s[i].strftime('%Y-%m-%d') + " " + e[i].strftime(
            '%Y-%m-%d')  # set start day of the month and end day of this month
        pytrends.build_payload(kw_list, cat=0, timeframe=frame, geo='', gprop='')  # use googleview api to get the data
        interest_over_time_df = pytrends.interest_over_time()
        if df is None:
            df = interest_over_time_df
        else:
            df = df.append(interest_over_time_df)
    print(len(df.index))
    print(frame)
    df.to_csv('bitcoin.csv', sep=',', encoding='utf-8')
Example #14
0
def _fetch_data(trendreq: TrendReq, kw_list: list[str], timeframe: str = 'today 3-m',
                cat: int = 0) -> pd.DataFrame:
    """Download google trends data using pytrends TrendReq and retries in
    case of a ResponseError."""
    attempts, fetched = 0, False
    while not fetched:
        try:
            trendreq.build_payload(
                kw_list=kw_list, timeframe=timeframe, cat=cat, geo='', gprop='')
        except ResponseError as e:
            print(e)
            print(f'Trying again in {60 + 5 * attempts} seconds.')
            sleep(60 + 5 * attempts)
            attempts += 1
            if attempts > 3:
                print('Failed after 3 attempts, abort fetching.')
                raise ce.RateLimited
        else:
            fetched = True
    return trendreq.interest_over_time()
Example #15
0
def GoogleTrendsSlopeCalculator(request):
    # Add your Gmail username to the google_username variable and your Gmail password to the google_password variable.
    google_username = "******"
    google_password = "******"
    connector = TrendReq(google_username, google_password)

    # This script downloads a series of CSV files from Google Trends. Please specify a filepath for where you'd like these files to be stored in the below variable.
    path = ""

    # Specify the filename of a CSV with a list of keywords in the variable, keyordcsv. The CSV should be one column, with header equal to Keywords (case sensitive).
    keywordcsv = "http://localhost:8000/static/keywords.csv"
    keywords = pd.read_csv(keywordcsv)
    pytrend = TrendReq()

    # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
    pytrend.build_payload(kw_list=['pizza', 'bagel'])

    # Interest Over Time
    interest_over_time_df = pytrend.interest_over_time()
    print(interest_over_time_df.head())
async def get_trends(item: Item):
    pytrend = TrendReq(hl='pt-BR', tz=360)
    keywords = [item.mensagem]
    pytrend.build_payload(kw_list=keywords,
                          cat=0,
                          timeframe='today 1-m',
                          geo='BR',
                          gprop='news')
    data = pytrend.interest_over_time()
    hoje = str(data[item.mensagem][29])
    ontem = str(data[item.mensagem][28])
    if data[item.mensagem][29] > data[item.mensagem][28]:
        resultado = 'Os interesse de pesquisa de hoje foram maior que ontem'
    else:
        resultado = 'Os interesse de pesquisa de hoje foram menor que ontem'
    return {
        "interesse de pesquisa relativo hoje": hoje,
        "interesse de pesquisa relativo ontem": ontem,
        "variação": resultado
    }
Example #17
0
def PytrendJob():
    pytrend = TrendReq(tz=540)

    todayDate = datetime.now().strftime('%Y-%m-%d')
    lastWeekDate = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')

    period = lastWeekDate + ' ' + todayDate

    pytrend.build_payload(kw_list=['쿠팡', '11번가'], geo='KR')
    dataCrawling = pytrend.interest_over_time()

    csvFileName = todayDate + '.csv'

    data = pd.DataFrame(dataCrawling)[['쿠팡', '11번가']]

    data['쿠팡증감률'] = data['쿠팡'].diff().fillna(0).astype(int)
    data['11번가증감률'] = data['11번가'].diff().fillna(0).astype(int)

    data.to_csv(csvFileName, index=True)
    print(todayDate + ": Google Trends Crawling ok…")
def explain_unemployment(rollingwindow=12):
    # Main function
    statecodes = pd.read_csv(_rootPath + _statecodes)
    statemap = dict(zip(statecodes['Code'].apply(lambda x:'LAUST'+str(x).zfill(2)+'0000000000003'),statecodes['State'])) # Create mapping
    statemap[_nationcode] = 'US' # Add US/nation-wide to mapping
    actualdata = pd.read_excel(_rootPath + _inputfile, 'BLS Data Series', header=3, index_col=0)
    actualdata = actualdata.T
    actualdata = actualdata.rename(columns=statemap)
    statesnamemap = dict(zip(_statesabr.values(),_statesabr.keys()))
    actualdata = actualdata.rename(columns=statesnamemap) # Change from state names to abbreviations
    actualdata = actualdata.drop(['PR','DC'],axis=1) # Removing Puerto Rice and DC
    actualdata = actualdata.dropna(how='all')

    pytrend = TrendReq() # Google Trends API
    inputdata = {}
    for geo in actualdata.columns:
        pytrend.build_payload(kw_list=['unemployment'], timeframe='all', geo=geo if geo == 'US' else 'US-'+geo) # Search for unemployment per geography
        inputdata[geo] = pytrend.interest_over_time()['unemployment']
    inputdata = pd.DataFrame(inputdata)
    inputdata = inputdata.shift().dropna(how='all') # Lag data as it is saved as first of month when should be last

    sampledata = {} # Sample plot
    sampledata['Unemployment Rate'] = actualdata['US']
    sampledata['Unemployment Google Trends'] = inputdata['US']
    sampledata = pd.DataFrame(sampledata)
    sampledata.plot(secondary_y=['Unemployment Google Trends'])

    basestatsdf, basefitted = linearmodel(actualdata, inputdata, rollingwindow, usegoogle=False) # Base model
    statsdf, fitted = linearmodel(actualdata, inputdata, rollingwindow, usegoogle=True,stdize=True) # With google

    statsdf.loc['Google_Tstat'].plot(kind='bar')
    plt.tight_layout()

    sampledata = {}
    sampledata['Unemployment Rate'] = actualdata['US']
    sampledata['Fitted (Base Model)'] = basefitted['US']
    sampledata['Fitted (Including Google)'] = fitted['US']
    sampledata = pd.DataFrame(sampledata)
    sampledata.plot()

    return basestatsdf, statsdf
def trackerfunc(driver_list, verbose=1):
    '''
    This function looks into what F1 drivers get searched in which region and how often.

    params
    ---------
    driver_list:list of drivers
    '''
    if verbose>1:
        print('Starting trackerfunc')
    total_drivers = len(driver_list)
    # Number of times we need to loop over
    max_google_request = 5
    iter = int(total_drivers/max_google_request)
    # Difference we need to add every time
    diff = int(total_drivers/iter)
    # Set an empty df
    full_results_region = pd.DataFrame()
    counter = 0
    for i in range(iter):
        pytrend = TrendReq()
        iter_drivers = driver_list[counter:counter+diff]
        #print('COMPLETE:',iter_drivers)
        counter+=diff
        time.sleep(10)
        try:
            pytrend.build_payload(kw_list=iter_drivers)
            df_region = pytrend.interest_by_region()
            ## Can add other things to the payload
            #df_interest_over_time = pytrend.interest_over_time()
            # APPEND TO FULL RESULTS
            full_results_region = full_results_region.append(df_region)
            full_results_region['datetime'] = DATE
            print('full_results len',len(full_results_region))
            # CHECK
            print('COMPLETE:',iter_drivers)
        except:
            print ('ERROR',len(iter_drivers))
        # SAVE
        # full_results_region.to_csv('full_results_region.csv',index=False)
    return full_results_region
Example #20
0
def getTrendData(keyword, timeframe=timeFrame):
    """

    Parameters
    ----------
    keyword : str
        Keyword to get trend data for

    Returns
    -------
    yoyIncrease : float
        Year-over-year increase for given keyword

    """

    dataset = []

    pytrends = TrendReq(hl='en-US', tz=360)  #Create pytrend query

    pytrends.build_payload(
        kw_list=[keyword],  # Build payload
        timeframe=timeFrame,  # Timeframe from above
        geo='US')  # US only, remove for global
    data = pytrends.interest_over_time()  # Pull data from query
    if not data.empty:
        data = data.drop(labels=['isPartial'], axis='columns')
        dataset.append(data)  # Cleaning df

    dataset = pd.concat(dataset, axis=1)

    lastYear = dataset.head(4)  #Last year's data is first four weeks of df
    thisYear = dataset.tail(4)  #This year's data is last four weeks of df

    df = pd.DataFrame()
    df = df.append(lastYear)
    df = df.append(thisYear)

    yoyIncrease = ((thisYear.mean(axis=0)[0] - lastYear.mean(axis=0)[0]) /
                   lastYear.mean(axis=0)[0]) * 100

    return yoyIncrease
Example #21
0
def start(search_words, start_date):
    print("... google module started")
    now = datetime.datetime.now()

    # Parameters for GOOGLE search
    kw_list = [search_words]
    year_start = int(start_date[:4])
    month_start = int(start_date[5:7])
    day_start = int(start_date[8:10])
    hour_start = 0
    print(kw_list, " ", year_start, " ", month_start, " ", day_start)

    # setting actual date for goggle search endpoint
    year_end = now.year
    month_end = now.month
    day_end = now.day
    hour_end = 0
    print(year_end, " ", month_end, " ", day_end)

    pytrend = TrendReq()
    pytrend.build_payload(kw_list)

    search_results = pytrend.get_historical_interest(kw_list,
                                                     year_start,
                                                     month_start,
                                                     day_start,
                                                     hour_start,
                                                     year_end,
                                                     month_end,
                                                     day_end,
                                                     hour_end,
                                                     cat=0,
                                                     geo='',
                                                     gprop='',
                                                     sleep=0)

    # save into file
    search_results.to_csv('google_results.csv')

    # print the first 10 datapoints
    print(search_results.head(10))
Example #22
0
def calculate_interest_over_time(request_user, account_user, niches, network):
    verified_acc = VerifiedUserAccounts.objects.filter(
        network=network, account_id=account_user.id).first()
    niches = _build_niche_str_arr(niches, verified_acc, network)[:4]

    if request_user.is_authenticated() and \
      (request_user.is_superuser or is_assistant(request_user) or \
      verified_acc in request_user.opened_accounts.all()):
        niches.insert(0, account_user.username)

    try:
        pytrend = TrendReq('*****@*****.**', 'shoutourbiz123', \
         hl='en-US', tz=360, custom_useragent=None)
        pytrend.build_payload(kw_list=niches)
        df = pytrend.interest_over_time()
    except Exception as e:
        return (None, None)

    cols = {}
    x_axis = []

    # initialize header groups
    for header in df.dtypes.index:
        cols[header] = []

    ndx = 0
    for index, row in df.iterrows():

        if ndx % 2 == 0 or ndx % 3 == 0 or ndx % 5 == 0 or ndx % 7 == 0 or ndx % 4 == 0:
            ndx += 1
            continue

        if not index.value in x_axis:
            x_axis.append(index.value)

        for key, value in cols.iteritems():
            cols[key].append(row[key])

        ndx += 1

    return (x_axis, cols)
    def pytrends_pull(self, query: list, query_loc: str, start_yr: int,
                      start_mo: int, end_yr: int, end_mo: int, type: str):
        """
        pytrends_pull(query:str, query_loc:str, start_yr:int, start_mo:int, end_yr:int, end_mo:int)
        Generates .csv file in raw_data folder with tweets based on query
        """

        geocode = self.us_states[query_loc.lower()]['abbr']

        #creating connection to trend.google.com
        pytrend = TrendReq(timeout=(10, 25))

        startdate = datetime.datetime(year=start_yr, month=start_mo, day=1)
        enddate = datetime.datetime(year=end_yr,
                                    month=end_mo,
                                    day=calendar.monthrange(end_yr, end_mo)[1],
                                    hour=23)

        if type == 'hour':
            historical_interest = pytrend.get_historical_interest(
                keywords=query,
                cat=0,
                geo=geocode,
                year_start=start_yr,
                month_start=start_mo,
                day_start=1,
                hour_start=0,
                year_end=end_yr,
                month_end=end_mo,
                day_end=calendar.monthrange(end_yr, end_mo)[1],
                hour_end=0)
        if type == 'day':
            timeframe = '{} {}'.format(startdate.strftime("%Y-%m-%d"),
                                       enddate.strftime("%Y-%m-%d"))
            pytrend.build_payload(kw_list=query, timeframe=timeframe)
            historical_interest = pytrend.interest_over_time()
        if type == 'week':
            pytrend.build_payload(kw_list=query)
            historical_interest = pytrend.interest_over_time()

        return historical_interest
Example #24
0
def gtrend_getvalue(kw_list,output_file,timeframe):
    """
    ライブラリを使用してGoogleTrendsからデータを取得する。
    #pytrends ref https://pypi.org/project/pytrends/#interest-by-region
    """
    try:
        sp = kw_list[0]
        pytrends = TrendReq(hl='ja-JP', tz=360)
        pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo='JP', gprop='')
        #関連キーワード
        trendsdata = pytrends.related_queries()
        o = output_file
        s = sp + 'query'
        exportdata(trendsdata,o,s,1)
        #関連トピック
        trendsdata = pytrends.related_topics()
        s = sp + 'topic'
        exportdata(trendsdata,o,s,1)
        #地域別の関心
        trendsdata = pytrends.interest_by_region(resolution='REGION', inc_low_vol=True, inc_geo_code=False)
        s = sp + 'region'
        exportdata(trendsdata,o,s,0)
        #時系列
        trendsdata = pytrends.interest_over_time()
        s = sp + 'overtime'
        exportdata(trendsdata,o,s,0)
        #サジェスト 
        trendsdata = pytrends.suggestions(sp)
        s = sp + 'suggestions'
        suggest_to_excel(trendsdata,o,s)

        #注目キーワード
        #trendsword = pytrends.trending_searches(pn='united_states') #アメリカ
        #trendsword = pytrends.trending_searches(pn='japan') #日本
        #s = "trendword"
        #f = exportdata(trendsword,o,s,0)

    except Exception as e:
        t, v, tb = sys.exc_info()
        print(traceback.format_exception(t,v,tb))
        print(traceback.format_tb(e.__traceback__))
Example #25
0
def google_index(word="python",
                 start_date="2019-12-01",
                 end_date="2019-12-04",
                 plot=True):
    """
    返回指定区间的谷歌指数
    """
    pytrends = TrendReq(hl="en-US", tz=360)
    kw_list = [word]
    pytrends.build_payload(kw_list,
                           cat=0,
                           timeframe=start_date + " " + end_date,
                           geo="",
                           gprop="")
    search_df = pytrends.interest_over_time()
    if plot:
        search_df[word].plot()
        plt.legend()
        plt.show()
        return search_df[word]
    return search_df[word]
Example #26
0
def get_search_interest_over_time(keyword_list,
                                  country_iso2,
                                  timeframe='today 3-m'):
    from pytrends.request import TrendReq
    pytrend = TrendReq()
    pytrend.build_payload(keyword_list,
                          cat=0,
                          timeframe=timeframe,
                          geo=country_iso2,
                          gprop='')
    res = pytrend.interest_over_time()
    array = res.to_numpy()

    # sum all scores for the past 5 years

    sum = array.sum(axis=0)[0] if (array.size > 0) else 0
    # count how many score points we have
    scoresCount = array.shape[0]

    avg = sum / scoresCount if (scoresCount != 0) else 0
    return avg
Example #27
0
def daily_google_interests(currData):
    data_list = []
    pytrends = TrendReq(hl='tr-TR', tz=360)
    pytrends.build_payload(
        kw_list=["dolar"],
        cat=0,
        timeframe='now 1-d',
        geo='TR',
        gprop='')
    data = pytrends.interest_over_time()
    data_frame = pd.DataFrame(data)['dolar']
    for a, b in data_frame.items():
        data_list.append(float(b))
    old_min = min(data_list)
    old_max = max(data_list)
    new_min = min(currData)
    new_max = max(currData)
    converted_list = []
    for item in data_list:
        converted_list.append(((item - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min)
    return converted_list
def build_my_payload(qlist, timeframe, pytrendobj=None):
  ntries=5
  for i in range(ntries):
    try:
      print('Try number %d: Building payload with qlist "%s" and timeframe "%s"' % (i+1, qlist, timeframe))
      if not pytrendobj:
        custom_useragent = random_word(8)
        print('  Building TrendReq() object from scratch with custom_useragent %s' % custom_useragent)
        pytrendobj = TrendReq(google_username, google_password, custom_useragent=random_word(8))
      # wait some time to keep it from getting blocked
      sleeptime = (2**i)*random.randint(1,10) # exponential random backoff
      print('Sleeping %d seconds exponential random backoff to avoid getting blocked' % sleeptime)
      time.sleep(sleeptime)
      print('  Building payload with qlist "%s" and timeframe "%s"' % (qlist, timeframe))
      pytrendobj.build_payload(kw_list=qlist, timeframe=timeframe)
      return pytrendobj
    except:
      print(traceback.format_exc())
      print("     Failed to build payload, probably couldn't get token, trying again...")
  print("         Failed to build payload after %d tries, giving up")
  return None
Example #29
0
def get_searches(key_word, state):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([key_word],
                           cat=0,
                           timeframe='2020-02-01 2020-03-10',
                           gprop='',
                           geo='US-{}'.format(state))
    df = pytrends.interest_over_time()

    print(df.head())

    sns.set()
    df['timestamp'] = pd.to_datetime(df.index)
    sns.lineplot(df['timestamp'], df[key_word])

    plt.title(
        "Normalized Searches for Coronavirus in NY (blue), MA (orange), and CA (green)"
        .format(key_word, state))
    plt.ylabel("Number of Searches")
    plt.xlabel("Date")
    plt.xticks(rotation=45)
Example #30
0
class DataLoader:
    def __init__(self, key):
        self.stock = yfinance.Ticker(key)
        self.hist = self.stock.history(period="max")
        self.feature_selection()
        self.pytrend = TrendReq()

    def printhist(self):
        print(self.hist)

    def gtrends(self):
        # form google trends
        search_keys = ["china", "trump"]
        self.pytrend.build_payload(kw_list=search_keys, timeframe='all')
        interest_over_time = self.pytrend.interest_over_time()
        ts = interest_over_time.reset_index(col_fill="date", inplace=False)
        sns.lineplot(x="date", y=search_keys[0], data=ts)
        plt.show()

    def feature_selection(self):
        self.hist.drop(['Dividends', 'Stock Splits'], axis=1, inplace=True)
Example #31
0
from pytrends.request import TrendReq


# Login to Google. Only need to run this once, the rest of requests will use the same session.
pytrend = TrendReq()

# Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
pytrend.build_payload(kw_list=['pizza', 'bagel'])

# Interest Over Time
interest_over_time_df = pytrend.interest_over_time()
print(interest_over_time_df.head())

# Interest by Region
interest_by_region_df = pytrend.interest_by_region()
print(interest_by_region_df.head())

# Related Queries, returns a dictionary of dataframes
related_queries_dict = pytrend.related_queries()
print(related_queries_dict)

# Get Google Hot Trends data
trending_searches_df = pytrend.trending_searches()
print(trending_searches_df.head())

# Get Google Top Charts
top_charts_df = pytrend.top_charts(cid='actors', date=201611)
print(top_charts_df.head())

# Get Google Keyword Suggestions
suggestions_dict = pytrend.suggestions(keyword='pizza')
Example #32
0
 def test_top_charts(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.top_charts(cid='actors', date=201611))
Example #33
0
## FIRST RUN ##

# Login to Google. Only need to run this once, the rest of requests will use the same session.
pytrend = TrendReq()

# Run the first time (if we want to start from today, otherwise we need to ask for an end_date as well
today = datetime.today().date()
old_date = today

# Go back in time
new_date = today - timedelta(days=step)

# Create new timeframe for which we download data
timeframe = new_date.strftime('%Y-%m-%d')+' '+old_date.strftime('%Y-%m-%d')
pytrend.build_payload(kw_list=kw_list, timeframe = timeframe)
interest_over_time_df = pytrend.interest_over_time()

## RUN ITERATIONS

while new_date>start_date:
    
    ### Save the new date from the previous iteration.
    # Overlap == 1 would mean that we start where we
    # stopped on the iteration before, which gives us
    # indeed overlap == 1.
    old_date = new_date + timedelta(days=overlap-1)
    
    ### Update the new date to take a step into the past
    # Since the timeframe that we can apply for daily data
    # is limited, we use step = maxstep - overlap instead of
Example #34
0
 def test_build_payload(self):
     """Should return the widgets to get data"""
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.token_payload)
Example #35
0
 def test_interest_by_region(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.interest_by_region())
Example #36
0
 def test_trending_searches(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.trending_searches(pn='p1'))
Example #37
0
 def test_related_queries(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.related_queries())
Example #38
0
 def get_google_trends(self, kw_list, trdays=250, overlap=100, 
                       cat=0, geo='', tz=360, gprop='', hl='en-US',
                       sleeptime=1, isPartial_col=False, 
                       from_start=False, scale_cols=True):
     """Retrieve daily google trends data for a list of search terms
     
     Parameters
     ----------
     kw_list : list of search terms (max 5)- see pyTrends for more details
     trdays : the number of days to pull data for in a search
         (the max is around 270, though the website seems to indicate 90)
     overlap : the number of overlapped days when stitching two searches together
     cat : category to narrow results - see pyTrends for more details
     geo : two letter country abbreviation (e.g 'US', 'UK') 
         default is '', which returns global results - see pyTrends for more details
     tz : timezone offset
         (default is 360, which corresponds to US CST - see pyTrends for more details)
     grop : filter results to specific google property
         available options are 'images', 'news', 'youtube' or 'froogle'
         default is '', which refers to web searches - see pyTrends for more details
     hl : language (e.g. 'en-US' (default), 'es') - see pyTrends for more details
     sleeptime : when stiching multiple searches, this sets the period between each
     isPartial_col : remove the isPartial column 
         (default is True i.e. column is removed)
     from_start : when stitching multiple results, this determines whether searches
         are combined going forward or backwards in time
         (default is False, meaning searches are stitched with the most recent first)
     scale_cols : google trend searches traditionally returns scores between 0 and 100
         stitching could produce values greater than 100
         by setting this to True (default), the values will range between 0 and 100
     
     Returns
     -------
     pandas Dataframe
     
     Notes
     -----
     This method is essentially a highly restricted wrapper for the pytrends package
     Any issues/questions related to its use would probably be more likely resolved
     by consulting the pytrends github page
     https://github.com/GeneralMills/pytrends
     """
     
     if len(kw_list)>5 or len(kw_list)==0:
         raise ValueError("The keyword list can contain at most 5 words")
     if trdays>270:
         raise ValueError("trdays must not exceed 270")
     if overlap>=trdays:
         raise ValueError("Overlap can't exceed search days")
     stich_overlap = trdays - overlap
     from_date = datetime.datetime.strptime(self.from_date, '%Y-%m-%d')
     to_date = datetime.datetime.strptime(self.to_date, '%Y-%m-%d')
     n_days = (to_date - from_date).days
     # launch pytrends request
     _pytrends = TrendReq(hl=hl, tz=tz)
     # get the dates for each search
     if n_days <= trdays:
         trend_dates = [' '.join([self.from_date, self.to_date])]
     else:
         trend_dates = ['{} {}'.format(
         (to_date - datetime.timedelta(i+trdays)).strftime("%Y-%m-%d"),
         (to_date - datetime.timedelta(i)).strftime("%Y-%m-%d")) 
                        for i in range(0,n_days-trdays+stich_overlap,
                                       stich_overlap)]
     if from_start:
         trend_dates = trend_dates[::-1]
     try:
         _pytrends.build_payload(kw_list, cat=cat, timeframe=trend_dates[0], 
                                geo=geo, gprop=gprop)
     except Exception as e:
         return pd.DataFrame({"error":e}, index=[0])
     output = _pytrends.interest_over_time().reset_index()
     if len(output)==0:
         return pd.DataFrame({"error":'search term returned no results (insufficient data)'}, index=[0])
     for date in trend_dates[1:]:
         time.sleep(sleeptime)
         try:
             _pytrends.build_payload(kw_list, cat=cat, timeframe=date, 
                                      geo=geo, gprop=gprop)
         except Exception as e:
             return pd.DataFrame({"error":e}, index=[0])
         temp_trend = _pytrends.interest_over_time().reset_index()
         temp_trend = temp_trend.merge(output, on="date", how="left")
         # it's ugly but we'll exploit the common column names
         # and then rename the underscore containing column names
         for kw in kw_list:
             norm_factor = np.ma.masked_invalid(temp_trend[kw+'_y']/temp_trend[kw+'_x']).mean()
             temp_trend[kw] = temp_trend[kw+'_x'] * norm_factor
         temp_trend =  temp_trend[temp_trend.isnull().any(axis=1)]
         temp_trend['isPartial'] = temp_trend['isPartial_x']
         output = pd.concat([output, temp_trend[['date', 'isPartial'] + kw_list]], axis=0)
     
     # reorder columns in alphabetical order
     output = output[['date', 'isPartial']+kw_list]
     
     if not isPartial_col:
         output = output.drop('isPartial', axis=1)
     output = output[output['date']>=self.from_date]
     if scale_cols:
         # the values in each column are relative to other columns
         # so we need to get the maximum value across the search columns
         max_val = float(output[kw_list].values.max())
         for col in kw_list:
             output[col] = 100.0*output[col]/max_val
     output = output.sort_values('date', ascending=self.ascending).reset_index(drop=True)
     return output
Example #39
0
 def test_suggestions(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.suggestions(keyword='pizza'))
Example #40
-1
def get_trends_over_time(keywords, timeframe, cat=0, geo='GB', gprop=''):
    print("Attempt made")

    # PyTrends client
    pytrends = TrendReq(hl='en-US', tz=-60)

    # Create the payload
    pytrends.build_payload(keywords,
                           cat=cat,
                           timeframe=timeframe,
                           geo=geo,
                           gprop=gprop)

    # Interest over time
    trends = pytrends.interest_over_time()

    time.sleep(5)
    # Return results (dataframe)
    return trends