Beispiel #1
0
def ggl_trends(grouped, keyword):
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = [keyword]
    pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='US', gprop='')
    ggl_trends = pytrends.interest_over_time()
    if ggl_trends.empty:
        return pd.DataFrame() 
    grouped_ggl_trends = ggl_trends.groupby(pd.Grouper(freq='1m')).mean().rename(columns={keyword: 'Google Trends'})
    return grouped.merge(grouped_ggl_trends, left_index=True, right_index=True, how='inner')
class GoogleTrendStatsEvaluator(StatsSocialEvaluator):
    def __init__(self):
        super().__init__()
        self.pytrends = None
        self.is_threaded = False

    # Use pytrends lib (https://github.com/GeneralMills/pytrends)
    # https://github.com/GeneralMills/pytrends/blob/master/examples/example.py
    def get_data(self):
        self.pytrends = TrendReq(hl='en-US', tz=0)
        # self.pytrends.GENERAL_URL = "https://trends.google.com/trends/explore"
        # self.symbol
        key_words = [self.symbol]
        try:
            # looks like only 1 and 3 months are working ...
            time_frame = "today " + str(self.social_config[STATS_EVALUATOR_HISTORY_TIME]) + "-m"
            # Attention apparement limite de request / h assez faible
            self.pytrends.build_payload(kw_list=key_words, cat=0, timeframe=time_frame, geo='', gprop='')
        except ResponseError as e:
            self.logger.warn(str(e))

    def eval_impl(self):
        interest_over_time_df = self.pytrends.interest_over_time()

        # compute bollinger bands
        self.eval_note = AdvancedManager.get_class(self.config, StatisticAnalysis).analyse_recent_trend_changes(
            interest_over_time_df[self.symbol], numpy.sqrt)

    def run(self):
        pass

    # check if history is not too high
    def load_config(self):
        super(GoogleTrendStatsEvaluator, self).load_config()
        if self.social_config[STATS_EVALUATOR_HISTORY_TIME] > STATS_EVALUATOR_MAX_HISTORY_TIME:
            self.social_config[STATS_EVALUATOR_HISTORY_TIME] = STATS_EVALUATOR_MAX_HISTORY_TIME

    def set_default_config(self):
        self.social_config = {
            CONFIG_REFRESH_RATE: 3600,
            STATS_EVALUATOR_HISTORY_TIME: 3
        }
 def get_data(self):
     self.pytrends = TrendReq(hl='en-US', tz=0)
     # self.pytrends.GENERAL_URL = "https://trends.google.com/trends/explore"
     # self.symbol
     key_words = [self.symbol]
     try:
         # looks like only 1 and 3 months are working ...
         time_frame = "today " + str(self.social_config[STATS_EVALUATOR_HISTORY_TIME]) + "-m"
         # Attention apparement limite de request / h assez faible
         self.pytrends.build_payload(kw_list=key_words, cat=0, timeframe=time_frame, geo='', gprop='')
     except ResponseError as e:
         self.logger.warn(str(e))
import pandas as pd
import time
from pytrends.request import TrendReq
startTime = time.time()

pytrends = TrendReq(hl='en-US', tz=360)

colnames = ["keywords"]
df = pd.read_csv("keyword_list.csv", names=colnames)
df2 = df["keywords"].values.tolist()
df2.remove("Keywords")

dataset = []  #data set for this years data
datasetLY = []  # data set for last years data

for x in range(0, len(df2)):
    keywords = [df2[x]]
    pytrends.build_payload(kw_list=keywords,
                           cat=0,
                           timeframe='2020-09-01 2020-09-30',
                           geo='US')
    data = pytrends.interest_by_region(resolution='COUNTRY',
                                       inc_low_vol=False,
                                       inc_geo_code=False)
    if not data.empty:
        # data = data.drop(labels = ['isPartial'], axis ='columns')
        dataset.append(data)

for x in range(0, len(df2)):
    keywords = [df2[x]]
    pytrends.build_payload(kw_list=keywords,
Beispiel #5
0
 def test_ispartial_dtype_timeframe_all(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'],
                           timeframe='all')
     df = pytrend.interest_over_time()
     assert ptypes.is_bool_dtype(df.isPartial)
Beispiel #6
0
from opinionytics.all_views.signup_view import signup_view
from opinionytics.all_views.text_results_view import text_results_view
from opinionytics.all_views.url_results_view import url_results_view
from opinionytics.all_views.data_results_view import data_results_view

# Connect to the Aylien API
client = textapi.Client(TEXT_API_ID, TEXT_API_KEY)

# Connect to the NLP library
natural_language_understanding = NaturalLanguageUnderstandingV1(
    username=NLP_API_USERNAME,
    password=NLP_API_PASSWORD,
    version=NLP_API_VERSION)

# Use Google Trends API
pytrends = TrendReq(hl='en-US', tz=360)

# Get all features from the APIs
all_features = AllFeatures(client, pytrends, natural_language_understanding)


# Go to the analyze view
def analyze(request):
    return render(request, 'analyze.html')


# Go to the text analyze view
def analyze_text(request):
    return render(request, 'analyze-text.html')

Beispiel #7
0
import pandas as pd
import pytrends
import seaborn as sns
import matplotlib.pyplot as plt
from pytrends.request import TrendReq

pytrend = TrendReq(hl='nl')

KEYWORDS = ['Asperge', 'Spruitjes', 'Pompoen']
KEYWORDS_CODES = [pytrend.suggestions(keyword=i)[0] for i in KEYWORDS]
df_CODES = pd.DataFrame(KEYWORDS_CODES)
# print(df_CODES)

EXACT_KEYWORDS = df_CODES['mid'].to_list()
DATE_INTERVAL = '2020-01-01 2021-01-01'
COUNTRY = [
    "GB", "DE", "NL"
]  #Use this link for iso country code: https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
CATEGORY = 0  # Use this link to select categories: https://github.com/pat310/google-trends-api/wiki/Google-Trends-Categories
SEARCH_TYPE = ''  #default is 'web searches',others include 'images','news','youtube','froogle' (google shopping)

Individual_EXACT_KEYWORD = list(zip(*[iter(EXACT_KEYWORDS)] * 1))
Individual_EXACT_KEYWORD = [list(x) for x in Individual_EXACT_KEYWORD]
dicti = {}
i = 1
for Country in COUNTRY:
    for keyword in Individual_EXACT_KEYWORD:
        pytrend.build_payload(kw_list=keyword,
                              timeframe=DATE_INTERVAL,
                              geo=Country,
                              cat=CATEGORY,
Beispiel #8
0
def start():
    GOOGLE_USR = ""
    GOOGLE_PWD = ""
    treq = None
    try:
        GOOGLE_USR = os.environ['GOOGLE_ACCOUNT']
        GOOGLE_PWD = os.environ['GOOGLE_PASSWORD']
    except Exception as ex:
        print("GOOGLE_ACCOUNT and GOOGLE_PASSWORD not set. " + ex)

    try:
        treq = TrendReq(GOOGLE_USR, GOOGLE_PWD)
    except Exception as ex:
        print("Username or password not valid. " + ex)
    print("Connected")

    #LANGUAGES
    all_languages = [i.name for i in models.Language.objects.all()]
    languages_cnt = len(all_languages)
    for i in range(0, languages_cnt + 1, 5):
        try:
            print("Fetch for " + ", ".join(all_languages[i:i + 4]))
            treq.build_payload(kw_list=all_languages[i:i + 4])
            populate_iot(treq.interest_over_time())
            populate_ior(treq.interest_by_region())
        except Exception as ex:
            print(ex)
            continue
    if languages_cnt % 5 != 0:
        try:
            slice_index = languages_cnt - int(languages_cnt / 5) * 5
            print("Fetch for " + ", ".join(all_languages[-slice_index]))
            treq.build_payload(kw_list=all_languages[-slice_index])
            populate_iot(treq.interest_over_time())
            populate_ior(treq.interest_by_region())
        except Exception as ex:
            print(ex)
            pass
    print("Interest over time language completed.")

    #FRAMEWORKS
    print("Interest over time framework started.")
    all_fw = [i.name for i in models.LibraryOrFramework.objects.all()]
    fw_cnt = len(all_fw)
    for i in range(0, fw_cnt + 1, 5):
        try:
            print("Fetch for " + ", ".join(all_fw[i:i + 4]))
            treq.build_payload(kw_list=all_fw[i:i + 4])
            populate_iot_fw(treq.interest_over_time())
            populate_ior_fw(treq.interest_by_region())
        except Exception as ex:
            print(ex)
            continue
    if fw_cnt % 5 != 0:
        try:
            slice_index = fw_cnt - int(fw_cnt / 5) * 5
            print("Fetch for " + ", ".join(all_fw[-slice_index]))
            treq.build_payload(kw_list=all_fw[-slice_index])
            populate_iot_fw(treq.interest_over_time())
            populate_ior_fw(treq.interest_by_region())
        except Exception as ex:
            print(ex)
            pass
    print("Interest over time framework completed.")
Beispiel #9
0
import os, requests, time, operator
from pytrends.request import TrendReq
import plotly.express as px
import pymysql.cursors
from sqlalchemy import create_engine, exc

## TODO: Need to find fix so that I can find 5 related terms at a time (fewer requests)
## TODO: Focus on specific categories for more actionable insights?
## TODO: Decide if I want to keep the GEO for both functions as Canada
## TODO: Should look into  "['link' 'value'] not found in axis" error during related topics lookup

# Only needs to run once - all requests use this session
# Timezone is 240 (could be -240 as well?)
pytrends = TrendReq(
    hl='en-US',
    tz=-240,
    retries=2,
    backoff_factor=0.2,
)

# Connect to the database

connection = pymysql.connect(host='localhost',
                             user='******',
                             password=password,
                             db='trends',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

# create sqlalchemy engine
engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}".format(
    user="******", pw=password, db="trends"))
Beispiel #10
0
 def test_trending_searches(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.trending_searches())
Beispiel #11
0
 def test_interest_by_region(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.interest_by_region())
Beispiel #12
0
 def test_suggestions(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.suggestions(keyword='pizza'))
Beispiel #13
0
	def get(self, request, *args, **kwargs):
		context = self.get_context_data()

		COUNTRIES = {
			"40": {
				"name": "South Africa",
				"postal": "ZA"
			},
			"15": {
				"name": "Germany",
				"postal": "DE"
			},
			"36": {
				"name": "Saudi Arabia",
				"postal": "SA"
			},
			"30": {
				"name": "Argentina",
				"postal": "AR"
			},
			"8": {
				"name": "Australia",
				"postal": "AU"
			},
			"44": {
				"name": "Austria",
				"postal": "AT"
			},
			"41": {
				"name": "Belgium",
				"postal": "BE"
			},
			"18": {
				"name": "Brazil",
				"postal": "BR"
			},
			"13": {
				"name": "Canada",
				"postal": "CA"
			},
			"38": {
				"name": "Chile",
				"postal": "CL"
			},
			"32": {
				"name": "Colombia",
				"postal": "CO"
			},
			"23": {
				"name": "South Korea",
				"postal": "KR"
			},
			"49": {
				"name": "Denmark",
				"postal": "DK"
			},
			"29": {
				"name": "Egypt",
				"postal": "EG"
			},
			"26": {
				"name": "Spain",
				"postal": "ES"
			},
			"1": {
				"name": "United States",
				"postal": "US"
			},
			"50": {
				"name": "Finland",
				"postal": "FI"
			},
			"16": {
				"name": "France",
				"postal": "FR"
			},
			"48": {
				"name": "Greece",
				"postal": "GR"
			},
			"10": {
				"name": "Hong Kong",
				"postal": "HK"
			},
			"45": {
				"name": "Hungary",
				"postal": "HU"
			},
			"3": {
				"name": "India",
				"postal": "IN"
			},
			"19": {
				"name": "Indonesia",
				"postal": "ID"
			},
			"6": {
				"name": "Israel",
				"postal": "IL"
			},
			"27": {
				"name": "Italy",
				"postal": "IT"
			},
			"4": {
				"name": "Japan",
				"postal": "JP"
			},
			"37": {
				"name": "Kenya",
				"postal": "KE"
			},
			"34": {
				"name": "Malaysia",
				"postal": "MY"
			},
			"21": {
				"name": "Mexico",
				"postal": "MX"
			},
			"52": {
				"name": "Nigeria",
				"postal": "NG"
			},
			"51": {
				"name": "Norway",
				"postal": "NO"
			},
			"17": {
				"name": "Netherlands",
				"postal": "NL"
			},
			"25": {
				"name": "Philippines",
				"postal": "PH"
			},
			"31": {
				"name": "Poland",
				"postal": "PL"
			},
			"47": {
				"name": "Portugal",
				"postal": "PT"
			},
			"43": {
				"name": "Czech Republic",
				"postal": "CZ"
			},
			"39": {
				"name": "Romania",
				"postal": "RO"
			},
			"9": {
				"name": "United Kingdon",
				"postal": "GB"
			},
			"14": {
				"name": "Russia",
				"postal": "RU"
			},
			"5": {
				"name": "Singapore",
				"postal": "SG"
			},
			"42": {
				"name": "Sweden",
				"postal": "SE"
			},
			"46": {
				"name": "China",
				"postal": "CH"
			},
			"12": {
				"name": "Taiwan",
				"postal": "TW"
			},
			"33": {
				"name": "Thailand",
				"postal": "TH"
			},
			"24": {
				"name": "Turkey",
				"postal": "TR"
			},
			"35": {
				"name": "Ukraine",
				"postal": "UA"
			},
			"28": {
				"name": "Vietnam",
				"postal": "VN"
			},
		}

		pytrends = TrendReq(settings.GOOGLE_ACCT, settings.GOOGLE_PW)
		hottrendslist = pytrends.hottrends({'geo': 'World'})
		for postal, trend in hottrendslist.items():
			COUNTRIES[postal]['trending'] = trend

		context['countries'] = COUNTRIES

		return render(request, 'toptrends/main.html', context)
Beispiel #14
0
 def test_top_charts(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.top_charts(cid='actors', date=201611))
Beispiel #15
0
 def test_trending_searches(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.trending_searches(pn='p1'))
Beispiel #16
0
 def test_related_queries(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.related_queries())
Beispiel #17
0
 def test_interest_over_time_froogle(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'], gprop='froogle')
     self.assertIsNotNone(pytrend.interest_over_time())
Beispiel #18
0
# The maximum for a timeframe for which we get daily data is 270.
# Therefore we could go back 269 days. However, since there might
# be issues when rescaling, e.g. zero entries, we should have an
# overlap that does not consist of only one period. Therefore,
# I limit the step size to 250. This leaves 19 periods for overlap.
maxstep = 269
overlap = 40
step    = maxstep - overlap + 1
kw_list = [' ']
start_date = datetime(2011, 12, 9).date()


## FIRST RUN ##

# Login to Google. Only need to run this once, the rest of requests will use the same session.
pytrend = TrendReq()

# Run the first time (if we want to start from today, otherwise we need to ask for an end_date as well
today = datetime.today().date()
old_date = today

# Go back in time
new_date = today - timedelta(days=step)

# Create new timeframe for which we download data
timeframe = new_date.strftime('%Y-%m-%d')+' '+old_date.strftime('%Y-%m-%d')
pytrend.build_payload(kw_list=kw_list, timeframe = timeframe)
interest_over_time_df = pytrend.interest_over_time()

## RUN ITERATIONS
Beispiel #19
0
 def test_interest_by_region_city_resolution(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.interest_by_region(resolution='CITY'))
Beispiel #20
0
 def test_build_payload(self):
     """Should return the widgets to get data"""
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.token_payload)
Beispiel #21
0
heado = [str(x).strip() for x in range(0, 20)]
heado.insert(0, "What")
heado.append("Date")
heado.append("Hour")

csv_path = 'data/Aus_google_trends.csv'
old_df = pd.read_csv(csv_path)

current_day = old_df[old_df['Date'] == bris_reverse_date]

if int(bris_hour) in current_day['Hour'].values.tolist():
    pass
    print("Nup")
else:

    pytrend = TrendReq(hl='en-US', tz=360)

    df = pytrend.trending_searches(pn='australia')
    df = df.rename(columns={0: "Google trending searches"})

    melted = df.T.reset_index()
    melted['Date'] = bris_reverse_date
    melted['Hour'] = bris_hour
    melted.columns = heado

    old_df = old_df.append(melted)

    print(old_df)

    with open(csv_path, "w") as f:
        old_df.to_csv(f, index=False)
Beispiel #22
0
def graph(request):
    words = request.GET.get('q')
    if not words:
        return redirect('tot:index')
    date = request.GET.get('y')
    a = ','
    pytrends = TrendReq()
    #pytrends = TrendReq(hl='ko', tz=540)
    if a not in words:
        if request.GET.get('y')=='year':
            date = 'today 12-m'
        elif request.GET.get('y')=='month':
            date = 'today 1-m'
        else:
            date = 'now 7-d'
        word1 = words
        list1 = word1
        print(f'{date}')
        pytrends.build_payload(list1, cat=0, timeframe= f'{date}', geo='', gprop='')
        print('2222222222222222')
        value = pytrends.interest_over_time()
        del value['isPartial']
        value = value.reset_index()
        value2 = value.to_json(force_ascii=False, orient='split', date_format='iso', date_unit='s')

        abc = json.loads(value2)
        ab = []
        cd = []
        for a in abc['data']:
            k = {}
            h = datetime.strptime(a[0],'%Y-%m-%dT%H:%M:%SZ')
            h2 = h.strftime('%Y-%m-%d %H:%M:%S')
            k['label'] = h2
            k['y'] = a[1]
            k['link'] = '/anal'
            ab.append(k)
            
            context = {'ab':ab, 'word1':word1}
        return render(request, 'tot/graph.html', context)
    elif a in words:
        pass
        words = words.split(',')
        word1 = words[0]
        word2 = words[1]
        if request.GET.get('y')== 'year':
            date = 'today 12-m'
        elif request.GET.get('y') == 'month':
            date = 'today 1-m'
        else:
            date = 'now 7-d'
        list1 = [word1, word2]
        pytrends.build_payload(list1, cat=0, timeframe= f'{date}', geo='', gprop='')
        value = pytrends.interest_over_time()
        del value['isPartial']
        value = value.reset_index()
        value2 = value.to_json(force_ascii=False, orient='split', date_format='iso', date_unit='s')
        abc = json.loads(value2)
        ab = []
        cd = []
        for a in abc['data']:
            k = {}
            z = {}
            h = datetime.strptime(a[0],'%Y-%m-%dT%H:%M:%SZ')
            h2 = h.strftime('%Y-%m-%d %H:%M:%S')
            k['label'] = h2
            k['y'] = a[1]
            k['link'] = '/anal'
            ab.append(k)
            z['label'] = h2
            z['y'] = a[2]
            z['link'] = '/anal'
            cd.append(z)
            context = {'ab':ab, 'cd':cd, 'word1':word1, 'word2':word2}
        return render(request, 'tot/graph.html', context)
def main():
    # Set up api wrapper
    pytrends = TrendReq(hl='en-US', tz=360)

    # Limit of 5 keywords
    kw_list = ["Steelcase"]

    # Build pipeline
    pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='', gprop='')

    # Get overall interest over the entire timeline
    interestDF = pytrends.interest_over_time()
    interestDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrends5YearInterest_test.csv",
        index=True)
    print(interestDF.head())
    print()

    # Sleep 60 prevents you from being rate limited
    # Get hourly interest over the time set
    hourlyDF = pytrends.get_historical_interest(kw_list,
                                                year_start=2019,
                                                month_start=7,
                                                day_start=1,
                                                hour_start=0,
                                                year_end=2019,
                                                month_end=7,
                                                day_end=1,
                                                hour_end=1,
                                                cat=0,
                                                geo='',
                                                gprop='',
                                                sleep=60)
    hourlyDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrends5YearHourlyInterest_test.csv",
        index=True)
    print(hourlyDF.head())
    print()

    # Get regional interest across the world
    # Can switch to state or city specific
    regionDF = pytrends.interest_by_region(resolution='COUNTRY',
                                           inc_low_vol=True,
                                           inc_geo_code=False)
    regionDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsRegionInterest_test.csv",
        index=True)
    print(regionDF.head())
    print()

    # Get rising related topics
    risingDF = pytrends.related_topics().get('Steelcase').get('rising')
    risingDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsRisingRelated_test.csv",
        index=True)
    # Get top related topics
    topDF = pytrends.related_topics().get('Steelcase').get('top')
    topDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsTopRelated_test.csv",
        index=True)
    print(risingDF.head())
    print()
    print(topDF.head())
Beispiel #24
0
#from pandas.io.json.json_normalize import nested_to_record
#!-*- coding: utf8 -*-
import pandas as pd
from pytrends.request import TrendReq
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
'''pytrends = TrendReq()
pytrends.build_payload(['suicide','自杀'],cat=0,timeframe='2003-01-01 2015-12-30',geo='',gprop='')
print(pytrends.interest_over_time())'''
pytrends = TrendReq()
kw_list = ["suicide", "自杀", "自殺"]


class google_trends:
    def collect(self, period, timestr):
        result = [0, 0, 0]
        for i, word in enumerate(kw_list):
            pytrends.build_payload([word],
                                   cat=0,
                                   timeframe=period,
                                   geo='HK',
                                   gprop='')
            df = pytrends.interest_over_time()
            print(df)
            result[i] = df[word].values.reshape(-1, 1).tolist()
            print(result[i])
            df.to_csv('./test%s.%s.csv' % (timestr, word), index=True)
            data = pd.read_csv('./test%s.%s.csv' % (timestr, word))
            x = data.iloc[:, 0]
            y = df.iloc[:, 0] / 100
language = 'en-US'
timezone = 360

query_terms = args.terms

# time span for search
# times = 'YYYY-MM-DD YYYY-MM-DD', where the first substring is the start
# date and the second substring is the end date
times = " ".join([args.start, args.end])

# geographic details
location = 'US'

# establish connection
query = TrendReq(username,
                 password,
                 hl=language,
                 tz=timezone,
                 custom_useragent=None)

# query setup
query.build_payload(query_terms, cat=0, timeframe=times, geo='US', gprop='')

# request to pandas table
data = query.interest_over_time()

print(data.tail())

data.to_csv('../data/google/{}.{}.csv'.format('_'.join(query_terms), times))
Beispiel #26
0
from pytrends.request import TrendReq
import matplotlib.pyplot as plt
import os

# 검색 keyword, 검색 기간 입력
keyword1 = "apple iphone"
keyword2 = "samsung galaxy"
period = "today 5-y"  # 검색기간: 최근 5년

# Google Trend 접속 및 데이터 탑재
trend_obj = TrendReq()
trend_obj.build_payload(kw_list=[keyword1, keyword2],
                        timeframe=period)  # kw_list: 최대 5개
trend_df = trend_obj.interest_over_time()

# 그래프 출력
plt.style.use("ggplot")
plt.figure(figsize=(14, 5))
trend_df[keyword1].plot()
trend_df[keyword2].plot()
plt.title("Google Trends: %s vs. %s" % (keyword1, keyword2), size=15)
plt.legend(loc="best")

# 그래프 파일 저장
cwd = os.getcwd()
output_filepath = os.path.join(
    cwd, "output", 'google_trend_%s_vs_%s.png' % (keyword1, keyword2))
plt.savefig(output_filepath, dpi=300)
plt.show()
Beispiel #27
0
from pytrends.request import TrendReq

kw_list = ["asuka", "asuka langley", "asuka evangelion"]
trends = TrendReq(hl='en-US', tz=360, timeout=(10, 25))
trends.build_payload(kw_list, cat=0, timeframe='now 1-d', geo='', gprop='')
df = trends.interest_over_time()
print(trends)
Beispiel #28
0
from pytrends.request import TrendReq
from addon import *

pytrends = TrendReq(hl='en-US', tz=360)

kw_list = ["covid symptoms"]
pytrends.build_payload(kw_list,
                       cat=0,
                       timeframe='2020-02-01 2020-10-09',
                       geo='',
                       gprop='')
# print(interest_by_city(pytrends))
print(pytrends.interest_over_time())
print(pytrends.interest_by_region(resolution="COUNTRY"))
kw_16 = 'savings association'
kw_17 = 'deposit money order'
kw_18 = 'deposit check'
kw_19 = 'best bank accounts'
kw_20 = 'small business bank'

# Group the keywords accordingly
kw_list_1 = [kw_1, kw_2, kw_3, kw_4, kw_5]
kw_list_2 = [kw_6, kw_7, kw_8, kw_9, kw_10]
kw_list_3 = [kw_11, kw_12, kw_13, kw_14, kw_15]
kw_list_4 = [kw_16, kw_17, kw_18, kw_19, kw_20]

kw_list = [kw_list_2, kw_list_3, kw_list_4]

# Create the master google trend keyword list using kw_list_1
pytrend = TrendReq(hl='en-US')

pytrend.build_payload(kw_list_1, cat=0, timeframe='all', geo='US', gprop='')

df_google = pytrend.interest_over_time()

df_google_annual = df_google.resample('A').mean()

df_google_annual.set_index(df_google_annual.index.year, inplace=True)

# Iterate through the remaining lists in kw_list and join accordingly
for item in kw_list:
    pytrend = TrendReq(hl='en-US')

    pytrend.build_payload(item, cat=0, timeframe='all', geo='US', gprop='')
Beispiel #30
0
from helpers import get_pretty_json_string, shell

from tgbich import run_tgbich
from ircbich import ircbich_init_and_loop

functions = {'tg': run_tgbich, 'irc': ircbich_init_and_loop}

print(f"{__file__}, {__name__}: starting")

from multiprocessing import Process
import os

print(f"{__file__}, {__name__}: pytrends: processing Trend Requests")
while True:
    try:
        pytrends = TrendReq(hl='ru-RU', tz=360)
        break
    except KeyboardInterrupt as e:
        raise e
    except:
        traceback.print_exc()
        TIME_TO_SLEEP_SECONDS = 1
        print("sleeping %s seconds" % str(TIME_TO_SLEEP_SECONDS))
        time.sleep(TIME_TO_SLEEP_SECONDS)
        continue
print(f"{__file__}, {__name__}: pytrends: completed.")


# launch processes
def launch_all():
    print("processing configs")
def main():
    cred = credentials.Certificate(
        "demand-forecast-by-hw-firebase-adminsdk-zbrw8-240f7687b3.json"
    )  # ダウンロードした秘密鍵
    ############################################################################################################################
    firebase_admin.initialize_app(
        cred)  #認証するためのやつ、これは一回やったらシャープで無効にしてください。ローカルの場合では大丈夫
    ############################################################################################################################
    #おまじない#######################################
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome('chromedriver', options=options)
    driver.implicitly_wait(10)
    ################################################

    ratios = []

    ############################################################
    #nify
    name = "http://search.nifty.com/shun/ranking.htm"
    driver.get(name)
    driver.implicitly_wait(10)
    html = driver.page_source.encode('utf-8')
    soup = BeautifulSoup(html, "html.parser")
    all_Ratio = soup.find_all("dd", class_="title")
    for ul_tag in all_Ratio:
        for span in ul_tag.find_all('a'):
            ratios.append(span.get_text())

    for i in range(5):
        name = "http://search.nifty.com/shun/ranking" + str(i + 1) + ".htm"
        driver.get(name)
        driver.implicitly_wait(10)
        html = driver.page_source.encode('utf-8')
        soup = BeautifulSoup(html, "html.parser")
        all_Ratio = soup.find_all("dd", class_="title")
        for ul_tag in all_Ratio:
            for span in ul_tag.find_all('a'):
                ratios.append(span.get_text())
    ###########################################################
    #d_menu
    name = "https://search.smt.docomo.ne.jp/result"
    driver.get(name)
    driver.implicitly_wait(10)
    html = driver.page_source.encode('utf-8')
    soup = BeautifulSoup(html, "html.parser")
    all_Ratio = soup.find_all("div", class_="swiper-slide")
    for ul_tag in all_Ratio:
        for span in ul_tag.find_all('a'):
            ratios.append(span.get_text())
    ###########################################################
    #twitter
    name = "https://tr.twipple.jp/hotword/today.html"
    driver.get(name)
    driver.implicitly_wait(10)
    html = driver.page_source.encode('utf-8')
    soup = BeautifulSoup(html, "html.parser")
    all_Ratio = soup.find_all("div", class_="rankTtl")
    for ul_tag in all_Ratio:
        for span in ul_tag.find_all('a'):
            ratios.append(span.get_text())
    ###########################################################
    driver.close

    email, product = read_datebase()
    for index, word_list in enumerate(product, start=0):
        ###############################################################
        pytrends = TrendReq(hl='en-US', tz=360)
        google_maillist = []

        for word in word_list:
            # API
            kw_list = [word]
            print(kw_list)
            pytrends.build_payload(
                kw_list, cat=0, timeframe='today 1-m', geo='JP',
                gprop='')  #1, 7 days or 1, 3, 12 months only
            df = pytrends.interest_over_time()  #datebase作成
            list_date = df.index.to_list()  #日付のリスト化
            list_sample = []
            dff = df.diff(1)
            list_sample = dff[str(word)].to_list()
            a = list_sample[-1:]
            if float(*a) >= 40:
                google_maillist.append(word)

            send_datebase(str(word), list_sample[1:])
        state = [list_date[1], *list_date[-1:]]
        send_datetime(state)
        ###########################################################
        mail_word = []
        for word in word_list:
            for ratio in ratios:
                if word in ratio:
                    mail_word.append(word)
        mail_word += google_maillist
        mail = numpy.unique(mail_word).tolist()
        print(str(email[index]) + ":" + str(mail))
        if (str(mail) != str("[]")):
            send_mail(str(email[index]), str(mail))
Beispiel #32
0
import collect  # import local collect file
from pytrends.request import TrendReq

if __name__ == "__main__":
    # Only need to run this once, the rest of requests will use the same session.
    pytrend = TrendReq()
    kw_list = ['aws', 'azure']
    # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
    pytrend.build_payload(kw_list=kw_list)

    interest_over_time_df = collect.get_interest_over_time(pytrend, kw_list)

    interest_by_region_df = collect.get_interest_by_region(pytrend, kw_list)

    related_queries_dict = collect.get_related_queries(pytrend, kw_list)

    trending_searches_df = collect.get_trending_searches(pytrend)

    today_searches_df = collect.get_today_searches()

    year = 2018
    geo = 'DE'  # GLOBAL or two letter country shortcut (e.g. DE, FR, US)
    language = 'en-US'
    timezone = 0
    top_charts_df = collect.get_top_charts(year, geo, language, timezone)

    kw = 'azure'
    suggestion_dict = collect.get_suggestions(kw)

    historical_interest_df = collect.get_historical_interest(pytrend, kw_list)
from IPython import get_ipython
get_ipython().magic('reset -sf')
import datetime
from datetime import datetime as dt
from datetime import date
import os 
import pathlib
import colorlover as cl
import plotly.graph_objs as go
import chart_studio.plotly as py
import plotly.express as px
import pandas as pd
from pytrends.request import TrendReq
APP_PATH = str(pathlib.Path(__file__).parent.resolve())

pytrends = TrendReq(hl='en-US', tz=360, retries=2, backoff_factor=0.1)
#%% [markdown]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Bat soup theory~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
today=datetime.date(2020,4,26)
search_time='2020-01-01 '+str(today)
searches_bat=[
    'bat soup', 
    'coronavirus bat soup',
    'china bat soup',
    'chinese bat soup',
    'wuhan bat soup',
    'bat soup virus',
]
groupkeywords = list(zip(*[iter(searches_bat)]*1))
groupkeywords = [list(x) for x in groupkeywords]
# Download search interest of bat key words
Beispiel #34
0
def get_pytrend():
    """Get pytrends to scrap Google Trends data"""
    pytrends = TrendReq(
        hl='en-US',  #language
        tz=360)  #timezone (US CST is 360)
    return pytrends
Beispiel #35
0
 def test_build_payload(self):
     """Should return the widgets to get data"""
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.token_payload)
Beispiel #36
0
import pandas as pd
from pytrends.request import TrendReq

final = pd.read_csv('./csv/14_final.csv')

# date=2015-02-01%202015-03-01&geo=KR&q=%ED%82%B9%EC%8A%A4%EB%A7%A8

base = 'https://trends.google.co.kr/trends/explore?cat=34&'

pytrend = TrendReq()

for index, row in final.iterrows():
    movie_name = getattr(row, 'movie_name')
    nation = getattr(row, 'nation')
    distribution = getattr(row, 'distribution')
    director_average = getattr(row, 'director_average')
    actor_sum = getattr(row, 'actor_sum')
    rate = getattr(row, 'rate')
    genre = getattr(row, 'genre')
    release_date = getattr(row, 'release_date')
    day1_screen = getattr(row, 'day1_screen')
    day1_audience = getattr(row, 'day1_audience')
    day14_date = getattr(row, 'day15_date')
    day14_audience = getattr(row, 'day15_audience')

    pytrend.build_payload(kw_list=[movie_name, '영화'],
                          timeframe='%s %s' % (release_date, day14_date),
                          geo='KR',
                          cat='34')

    interest_time_over_time = pytrend.interest_over_time()
Beispiel #37
0
 def test_interest_over_time_bad_gprop(self):
     pytrend = TrendReq()
     with self.assertRaises(ValueError):
         pytrend.build_payload(kw_list=['pizza', 'bagel'], gprop=' ')
Beispiel #38
0
def getgoogledata():

    languages = ['Eng', 'Ger', 'Esp']
    categories = ['Overview', 'Regulation', 'Sanctions', 'Situation']

    dictctry = {
        'Eng': {
            'Canada': "CA",
            'Australia': "AU",
            'India': "IN",
            'Pakistan': "PK",
            'Ireland': "IE",
            'United States': "US",
            'New Zealand': "NZ",
            'South Africa': "ZA",
            'Singapore': "SG",
            "Sierra Leone": "SL",
            "Liberia": "LR",
            "Ghana": "GH",
            "Nigeria": "NG",
            "Cameroon": "CM",
            "Ethiopia": "ET",
            "Uganda": "UG",
            "Rwanda": "RW",
            "Kenya": "KE",
            "Tanzania": "TZ",
            "Malawi": "MW",
            "Zambia": "ZM",
            "Zimbabwe": "ZW",
            "Botswana": "BW",
            "United Kingdom": "GB",
            "Sudan": "SD",
            "South Sudan": "SS",
            "Nambia": "NA"
        },  #https://de.m.wikipedia.org/wiki/Datei:Official_languages_in_Africa.svg
        'Ger': {
            'Switzerland': "CH",
            'Austria': "AT",
            'Germany': "DE"
        },
        'Esp': {
            'Bolivia': "BO",
            'Uruguay': "UY",
            'Paraguay': "PY",
            'Venezuela': "VE",
            'Colombia': "CO",
            'Ecuador': "EC",
            'Peru': "PE",
            'Chile': "CL",
            'Argentina': "AR",
            'Mexico': "MX",
            'Spain': 'ES'
        }
    }

    ctrylist = []
    for lang in languages:
        ctrylist += list(dictctry[lang])

    weights = {
        'Political Regulation': 16,
        'Political Sanctions': 33,
        'Political Situation': 45
    }

    # create dictionary with relevant keywords
    kw_dict = {
        'Eng': {
            'Political Regulation': [
                "bureaucracy", "Environmental protection", "Regulations",
                "taxes", "property rights", 'expropriation'
            ],
            'Political Sanctions': [
                "exchange", "foreign investment", "quotas", 'restrictions',
                'subsidies', 'tariffs', "transferability", "sanctions"
            ],
            'Political Situation': [
                "central bank", "corruption", "instability", "judiciary",
                "nationalization", "protectionism", "revolt",
                'social conflict', "strike", "terrorism", "war"
            ],
            'Corruption': ['corruption']
        },
        'Ger': {
            'Political Regulation': [
                "Bürokratie", "Umweltschutz", "Regulierung", "Steuern",
                "Urheberrecht", 'Enteignung'
            ],
            'Political Sanctions': [
                "Transaktionen", "foreign investment", "Einfuhrzoll",
                'Beschränkungen', 'Subventionen', "Sanktionen"
            ],
            'Political Situation': [
                "Zentralbank", "Korruption", "Instabilität", "Justiz",
                "Nationalismus", "Protektionismus", "Aufstände",
                'Staatsgewalt', "Streik", "Terrorismus", "Krieg"
            ],
            'Corruption': ['Korruption']
        },
        'Esp': {
            'Political Regulation': [
                "burocracia", "protección del medio ambiente", "reglamento",
                "impuesto", "regulaciones"
            ],
            'Political Sanctions': [
                "sanciones", "restricciones", "convertibilidad", "cuotas",
                "subsidio", "tarifas", 'cambiario', 'inversion extranjera',
                'expropiación', "derecho de propiedad"
            ],
            'Political Situation': [
                "judicial", "situación política", "corrupcion",
                "conflicto social", "revuelta", "inestabilidad",
                "nacionalismo", 'gobierno militar', "proteccionismo", "huelga",
                "terrorismo", "guerra", "banco central"
            ],
            'Corruption': ['corrupcion']
        }
    }

    # start pytrend and create DataFrame for all country data
    pytrends = TrendReq(hl='en-EN', tz=360, timeout=(100))

    #dryrun pytrend - with manually fixed first search term and geo
    pytrends.build_payload(
        ['bureaucracy'],
        cat=0,
        timeframe=
        f'2006-01-01 {datetime.datetime.today().strftime("%Y-%m-%d")}',
        geo='CA',
        gprop='')  #build data extraction references
    dry_df = pytrends.interest_over_time()
    filt = dry_df[
        'isPartial'] == 'True'  # create filter to get isPartial column
    dry_df.drop(
        index=dry_df[filt].index,
        inplace=True)  # drop all indexes where values are inPartial = True
    dry_df.drop(columns=['isPartial'], inplace=True)  # drop column isPartial

    all_ctys_df = pd.DataFrame(columns=[list(
        dictctry[languages[0]])[0]])  #grab first country name for first column
    weighted_sbpri_df = pd.DataFrame(
        columns=ctrylist)  # create dataframe for weigthed values
    weighted_regulation_df = pd.DataFrame(
        columns=ctrylist)  # create dataframe for weigthed values
    weighted_situation_df = pd.DataFrame(columns=ctrylist)
    weighted_sanctions_df = pd.DataFrame(columns=ctrylist)

    #map zero values to weighted sbpri - this ensure that we can add the weigthed values of each category
    weighted_sbpri_df[ctrylist[0]] = dry_df['bureaucracy'] * 0
    weighted_sbpri_df = weighted_sbpri_df.fillna(0)

    #first loop over Languages
    for lang in languages:

        #Second loop over Categories and Respective Terms for that languages
        for tpc, terms in kw_dict[lang].items():
            all_terms_df = pd.DataFrame(columns=[terms[0]])

            # Third run Pytrend per Country in that language with the respective categories and terms
            for country, abbreviation in dictctry[lang].items():
                whole_df = pd.DataFrame(columns=[terms[0]
                                                 ])  # create empty dataframe
                #             if country not in ['New Zealand', 'Ireland', 'Bolivia', 'Uruguay', 'Paraguay']: #countries where search terms have no data

                for i in terms:  #loop for data collection from google for each individual word in kw_list
                    try:
                        pytrends.build_payload(
                            [i],
                            cat=0,
                            timeframe=
                            f'2006-01-01 {datetime.datetime.today().strftime("%Y-%m-%d")}',
                            geo=abbreviation,
                            gprop='')  #build data extraction references
                        df = pytrends.interest_over_time(
                        )  #create initial dataframe
                        #                     filt = df['isPartial'] == 'True' # create filter to get isPartial column
                        #                     df.drop(index=df[filt].index, inplace=True) # drop all indexes where values are inPartial = True
                        df.drop(columns=['isPartial'],
                                inplace=True)  # drop column isPartial
                        whole_df[i] = df[i]  #add column to whole_df dataframe
                        time.sleep(random.uniform(2, 5))

                        print(country + ' / ' + i)
                    except:
                        print("error " + country + ' / ' + i)
                        pass

        # After Thrid Loop Correct data Per Country

        # seasonal correction to each search term since words have different seasonality (trabjar) searched more beginning of year other words much less in the beginning

                delta_df = whole_df
                delta_df.dropna(axis=1, inplace=True)
                delta_df.replace(0, 0.1, inplace=True)
                for i in delta_df.columns:
                    result_mul = seasonal_decompose(delta_df[i],
                                                    model='multiplicative',
                                                    extrapolate_trend='freq')
                    delta_df[i] = (delta_df[i] / result_mul.seasonal)

                # create change deltas for search term !! check if this may ruin data !!
                delta_df = delta_df - delta_df.min()
                delta_df = delta_df / delta_df.max() * 100

                #         set delta_df which is delta per word and seasonally corrected as whole_df
                whole_df = delta_df

                # create mean column for every country
                x = whole_df[terms[
                    0]] * 0  # create dataframe with same amount of rows as whole_df and 0 values
                for i in whole_df.columns:
                    x += whole_df[i]
                    if tpc == list(kw_dict[lang])[0]:
                        all_terms_df[i] = whole_df[i]
                x = x / len(whole_df.columns)
                whole_df['Mean'] = x

                # After Data correcting and Mean creation add Categories to weighted df with the necessary weights
                if tpc == list(kw_dict[lang])[0]:
                    weighted_sbpri_df[country] += weights[list(
                        kw_dict[lang])[0]] * whole_df['Mean']
                    weighted_regulation_df[country] = whole_df['Mean']

                if tpc == list(kw_dict[lang])[1]:
                    weighted_sbpri_df[country] += weights[list(
                        kw_dict[lang])[1]] * whole_df['Mean']
                    weighted_sanctions_df[country] = whole_df['Mean']

                if tpc == list(kw_dict[lang])[2]:
                    weighted_sbpri_df[country] += weights[list(
                        kw_dict[lang])[2]] * whole_df['Mean']
                    weighted_situation_df[country] = whole_df['Mean']

    weighted_sbpri_df = weighted_sbpri_df.applymap(
        lambda x: x / sum(weights.values()))

    # push new data to database
    Data.objects.all().delete()  # delete data in database

    for country in weighted_sbpri_df:
        for row in weighted_sbpri_df.index:
            Data.objects.create(date=row,
                                country=country,
                                category='SBPRI',
                                value=weighted_sbpri_df[country][row])

    for country in weighted_regulation_df:
        for row in weighted_regulation_df.index:
            Data.objects.create(date=row,
                                country=country,
                                category=categories[1],
                                value=weighted_regulation_df[country][row])

    for country in weighted_sanctions_df:
        for row in weighted_sanctions_df.index:
            Data.objects.create(date=row,
                                country=country,
                                category=categories[2],
                                value=weighted_sanctions_df[country][row])

    for country in weighted_situation_df:
        for row in weighted_situation_df.index:
            Data.objects.create(date=row,
                                country=country,
                                category=categories[3],
                                value=weighted_situation_df[country][row])
Beispiel #39
0
 def test_related_queries(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.related_queries())
Beispiel #40
0
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US', tz=360)
kw_list = ["Ripple"]
pytrends.build_payload(kw_list, cat=0, timeframe='now 7-d', geo='', gprop='')
df = pytrends.interest_over_time()
df['Ripple'].plot(kind='line')
Beispiel #41
0
google_username = "******"
google_password = "******"
path = ""

today = datetime.datetime.today().strftime('%y%m%d_%H%M_')

in_regions = ["IN-AN", "IN-AP", "IN-AS", "IN-BR", "IN-CH", 
			  "IN-CT", "IN-DL", "IN-GA", "IN-GJ",
			  "IN-HP", "IN-HR", "IN-JH", "IN-JK", "IN-KA", "IN-KL", 
			  "IN-MH", "IN-MN", "IN-MP",
			  "IN-OR", "IN-PB", "IN-PY", "IN-RJ",  
			  "IN-TG", "IN-TN", "IN-UP", "IN-UT", "IN-WB"]

# connect to Google
pytrend = TrendReq(google_username, google_password)

# India for MRI
trend_payload = {'q': 'mri, sonography, ultrasound', 'geo': "IN"}
print(trend_payload)
trend = pytrend.trend(trend_payload, return_type = 'dataframe')
time.sleep(random.uniform(10, 15))
file_name = "data/" + today + "mri_IN_.csv"
trend.to_csv(file_name)


# India for xray
trend_payload = {'q': 'xray, x-ray, sonography, ultrasound', 'geo': "IN"}
print(trend_payload)
trend = pytrend.trend(trend_payload, return_type = 'dataframe')
time.sleep(random.uniform(10, 15))
Beispiel #42
0
from pytrends.request import TrendReq

#connect to Mongo DB Server
# client =MongoClient("mongodb://*****:*****@sdgp1-fmfys.mongodb.net/whatIsSearchedToday?authSource=admin&replicaSet=sdgp1-shard-0&readPreference=primary&appname=MongoDB%20Compass%20Community&ssl=true'
)

db = client.test  #test connection if connected

#establishing connection with google trends

try:
    pytrend = TrendReq()
except:
    pytrend = TrendReq()
clientdatabaseName = 'whatIsSearchedToday'
today_searches_df = pytrend.today_searches()
db = client[
    clientdatabaseName]  #open database if not available will be created
db.whatissearchedtodays.drop()  #delete collection to have fresh collection
whatIsSearchedTodayCollection = db[
    'whatissearchedtodays']  #new collection for what is searched today
df = pd.DataFrame(today_searches_df)
try:
    df = df.head(6)
except:
    print("Less Than 6 values")
Beispiel #43
0
 def get_google_trends(self, kw_list, trdays=250, overlap=100, 
                       cat=0, geo='', tz=360, gprop='', hl='en-US',
                       sleeptime=1, isPartial_col=False, 
                       from_start=False, scale_cols=True):
     """Retrieve daily google trends data for a list of search terms
     
     Parameters
     ----------
     kw_list : list of search terms (max 5)- see pyTrends for more details
     trdays : the number of days to pull data for in a search
         (the max is around 270, though the website seems to indicate 90)
     overlap : the number of overlapped days when stitching two searches together
     cat : category to narrow results - see pyTrends for more details
     geo : two letter country abbreviation (e.g 'US', 'UK') 
         default is '', which returns global results - see pyTrends for more details
     tz : timezone offset
         (default is 360, which corresponds to US CST - see pyTrends for more details)
     grop : filter results to specific google property
         available options are 'images', 'news', 'youtube' or 'froogle'
         default is '', which refers to web searches - see pyTrends for more details
     hl : language (e.g. 'en-US' (default), 'es') - see pyTrends for more details
     sleeptime : when stiching multiple searches, this sets the period between each
     isPartial_col : remove the isPartial column 
         (default is True i.e. column is removed)
     from_start : when stitching multiple results, this determines whether searches
         are combined going forward or backwards in time
         (default is False, meaning searches are stitched with the most recent first)
     scale_cols : google trend searches traditionally returns scores between 0 and 100
         stitching could produce values greater than 100
         by setting this to True (default), the values will range between 0 and 100
     
     Returns
     -------
     pandas Dataframe
     
     Notes
     -----
     This method is essentially a highly restricted wrapper for the pytrends package
     Any issues/questions related to its use would probably be more likely resolved
     by consulting the pytrends github page
     https://github.com/GeneralMills/pytrends
     """
     
     if len(kw_list)>5 or len(kw_list)==0:
         raise ValueError("The keyword list can contain at most 5 words")
     if trdays>270:
         raise ValueError("trdays must not exceed 270")
     if overlap>=trdays:
         raise ValueError("Overlap can't exceed search days")
     stich_overlap = trdays - overlap
     from_date = datetime.datetime.strptime(self.from_date, '%Y-%m-%d')
     to_date = datetime.datetime.strptime(self.to_date, '%Y-%m-%d')
     n_days = (to_date - from_date).days
     # launch pytrends request
     _pytrends = TrendReq(hl=hl, tz=tz)
     # get the dates for each search
     if n_days <= trdays:
         trend_dates = [' '.join([self.from_date, self.to_date])]
     else:
         trend_dates = ['{} {}'.format(
         (to_date - datetime.timedelta(i+trdays)).strftime("%Y-%m-%d"),
         (to_date - datetime.timedelta(i)).strftime("%Y-%m-%d")) 
                        for i in range(0,n_days-trdays+stich_overlap,
                                       stich_overlap)]
     if from_start:
         trend_dates = trend_dates[::-1]
     try:
         _pytrends.build_payload(kw_list, cat=cat, timeframe=trend_dates[0], 
                                geo=geo, gprop=gprop)
     except Exception as e:
         return pd.DataFrame({"error":e}, index=[0])
     output = _pytrends.interest_over_time().reset_index()
     if len(output)==0:
         return pd.DataFrame({"error":'search term returned no results (insufficient data)'}, index=[0])
     for date in trend_dates[1:]:
         time.sleep(sleeptime)
         try:
             _pytrends.build_payload(kw_list, cat=cat, timeframe=date, 
                                      geo=geo, gprop=gprop)
         except Exception as e:
             return pd.DataFrame({"error":e}, index=[0])
         temp_trend = _pytrends.interest_over_time().reset_index()
         temp_trend = temp_trend.merge(output, on="date", how="left")
         # it's ugly but we'll exploit the common column names
         # and then rename the underscore containing column names
         for kw in kw_list:
             norm_factor = np.ma.masked_invalid(temp_trend[kw+'_y']/temp_trend[kw+'_x']).mean()
             temp_trend[kw] = temp_trend[kw+'_x'] * norm_factor
         temp_trend =  temp_trend[temp_trend.isnull().any(axis=1)]
         temp_trend['isPartial'] = temp_trend['isPartial_x']
         output = pd.concat([output, temp_trend[['date', 'isPartial'] + kw_list]], axis=0)
     
     # reorder columns in alphabetical order
     output = output[['date', 'isPartial']+kw_list]
     
     if not isPartial_col:
         output = output.drop('isPartial', axis=1)
     output = output[output['date']>=self.from_date]
     if scale_cols:
         # the values in each column are relative to other columns
         # so we need to get the maximum value across the search columns
         max_val = float(output[kw_list].values.max())
         for col in kw_list:
             output[col] = 100.0*output[col]/max_val
     output = output.sort_values('date', ascending=self.ascending).reset_index(drop=True)
     return output
Beispiel #44
0
from pytrends.request import TrendReq


# Login to Google. Only need to run this once, the rest of requests will use the same session.
pytrend = TrendReq()

# Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
pytrend.build_payload(kw_list=['pizza', 'bagel'])

# Interest Over Time
interest_over_time_df = pytrend.interest_over_time()
print(interest_over_time_df.head())

# Interest by Region
interest_by_region_df = pytrend.interest_by_region()
print(interest_by_region_df.head())

# Related Queries, returns a dictionary of dataframes
related_queries_dict = pytrend.related_queries()
print(related_queries_dict)

# Get Google Hot Trends data
trending_searches_df = pytrend.trending_searches()
print(trending_searches_df.head())

# Get Google Top Charts
top_charts_df = pytrend.top_charts(cid='actors', date=201611)
print(top_charts_df.head())

# Get Google Keyword Suggestions
suggestions_dict = pytrend.suggestions(keyword='pizza')