Example #1
0
 def test_suggestions(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.suggestions(keyword='pizza'))
from . import *
from app.irsystem.models.helpers import *
from app.irsystem.models.helpers import NumpyEncoder as NumpyEncoder

###### my imports
import json
import numpy as np
import random
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

import pandas as pd
from pytrends.request import TrendReq

pytrend = TrendReq()

# Get Google Hot Trends data
df = pytrend.trending_searches(pn='united_states')
trending = []
for x in df.values:
    trending.append(x[0])

project_name = "Custom Jeopardy Quiz Generation"
net_id = "Matthew Simon: mls498, Grayson Campbell: gac88, Daniel Hayon: dh488, Theo Carrel: tjc233, Carol Zhang: cz233"


@irsystem.route('/', methods=['GET'])
def search():
    query = request.args.get('search')
    if not query:
        data = []
#!/usr/bin/python
__author__ = "Justin Stals"

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US', tz=360)


def get_deltas(keyword, show_plot=False):

    kw_list = [keyword]
    pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='', gprop='')
    interest_over_time = pytrends.interest_over_time()

    current = int(interest_over_time[keyword].tail(1).values[0])

    deltas = {}

    for month_range in [3, 6]:
        tminus = float(interest_over_time[keyword].tail(month_range).values[0])
        if tminus != 0:
            deltas[str(month_range) + 'm'] = float("{:.2f}".format(
                current / tminus * 100 / 1))
        else:
            deltas[str(month_range) + 'm'] = None

    for year_range in [1, 3, 5]:
        tminus = float(interest_over_time[keyword].tail(year_range *
                                                        12).values[0])
print("HEllo")
from pytrends.request import TrendReq
import pandas as pd

pytrends = TrendReq(['flu'])

# Note: make better version w/o pytrends, using requests

df = pytrends.interest_by_region(resolution='CITY')
print(df.head())
Example #5
0
def go_to_do(proxies, input_count=1):
    #開檔案提取要輸入的資料
    input_file = "Main_Actor.csv"
    actor_data = take_actor_data(input_file)
    actor_id, actor_name = take_actor_id_name(actor_data)
    #初始值設定
    a_count = 0
    global b_count
    b_count = input_count
    out_file_name = "actor_trends.json"
    #從輸入筆數(input_count)開始
    for actor in actor_name:
        # time.sleep(random.randint(3, 8))
        if a_count < input_count:
            a_count += 1
            continue
        #使用Api建立instance:  tz參數是時區 360為美國時區
        pytrend = TrendReq(tz=360, proxies=proxies)
        kw_list = [actor]
        #超過五年單位會變成月,因為要以周為單位所以將時間分成四個區段
        timeframe = [
            "2004-01-01 2007-12-31", "2008-01-01 2011-12-31",
            "2012-01-01 2015-12-31", "2016-01-01 2019-12-31"
        ]
        out_data = []
        print("====開始第{}筆====".format(a_count))
        t_count = 20
        for t in timeframe:
            #time.sleep(random.randint(2,8))
            #因使用proxy本身就換延遲所以不用sleep
            #搜尋使用的參數,其中cat=34 為電影類別,requests跟爬資料都是在此函式 執行
            pytrend.build_payload(kw_list=kw_list,
                                  cat=34,
                                  timeframe=t,
                                  geo='US',
                                  gprop='')
            #使用api的interest_over_time方法去取的Count的資料 ,用get是因為我們只對Count欄位有興趣
            data = pytrend.interest_over_time().get(kw_list)
            try:
                #col預設名稱為電影名稱,不方便以後取值使用,因此將col名稱一致改為count
                data.rename(columns={data.columns[0]: "Count"}, inplace=True)
                # 預設是dataframe的格式,轉成json用陣列處理
                preload = json.loads(data.to_json(orient='table'))['data']
                for p in preload:
                    p['date'] = p['date'][0:10]  #時間欄位格式到秒 ,改為只到日
                    out_data.append(p)
            except:
                print("{}%完成,沒值,區間:{}".format(t_count, t))
                t_count += 20
                continue
            print("{}%完成,有值,區間:{}".format(t_count, t))
            # time.sleep(random.randint(2, 5))
            t_count += 20
        #設定要輸出的字典
        output = dict([("Actor_ID", actor_id[a_count]), ("Actor_name", actor),
                       ("data", out_data)])
        #輸出成 json
        with open(out_file_name, "a", encoding="utf-8") as out_f:
            if a_count == 1:
                out_f.write("[" + json.dumps(output) + "\n")
            else:
                out_f.write(',' + json.dumps(output) + "\n")
        out_f.close()
        print("\n==第{}筆完成,演員:{},ID:{}==".format(a_count, actor,
                                                actor_id[a_count]))
        a_count += 1

        b_count = a_count  #break時紀錄第幾筆開始

    with open(out_file_name, "a", encoding="utf-8") as out_f:
        out_f.write("]")
    out_f.close()
    global complete_all
    complete_all = 1
Example #6
0
 def __init__(self, keywords):
     self.pytrends = TrendReq(hl='uk', tz=360)
     self.pytrends.build_payload(keywords)
Example #7
0
from pytrends.request import TrendReq
import pandas as pd
import time
import pytrends

startTime = time.time()
pytrends = TrendReq(hl='en-US', tz=360, timeout=(10, 25))
df = pd.read_csv("allkeywords10.csv")
keylist = df["query"].values.tolist()

print(keylist)

dataset = []

for x in range(0, len(keylist)):
    keywords = [keylist[x]]
    pytrends.build_payload(kw_list=keylist,
                           cat=8,
                           timeframe='2020-01-01 2020-07-31',
                           geo='US')
    data = pytrends.interest_over_time()
    if not data.empty:
        data = data.drop(labels=['isPartial'], axis='columns')
        dataset.append(data)

result = pd.concat(dataset, axis=1)
result.to_csv('search_result_10.csv')
import json
import time
import os

output_folder = "./output_csv%s/" % datetime.datetime.now().strftime(
    "%Y-%m-%d")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

pd.options.mode.chained_assignment = None  # default='warn'
timezone_offset = -330  # INDIA
# timezone_offset = -480 # Phillipines

pytrends = TrendReq(hl='en-US',
                    timeout=(10, 25),
                    tz=timezone_offset,
                    retries=10,
                    backoff_factor=0.5)

date_entry = input('Enter a start date in YYYY-MM-DD format: ')
year, month, day = map(int, date_entry.split('-'))
date1 = datetime.date(year, month, day)

date_entry = input('Enter a end date in YYYY-MM-DD format: ')
year, month, day = map(int, date_entry.split('-'))
date2 = datetime.date(year, month, day)

t_frame = date1.strftime("%Y-%m-%d") + " " + date2.strftime("%Y-%m-%d")

cat_df = pd.read_csv("categories_defined.csv")
cat_names = cat_df["name"].tolist()
Example #9
0
def actual_prediction(key_word):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([key_word],
                           cat=0,
                           timeframe='2021-01-01 2021-01-15',
                           gprop='',
                           geo='')
    df = pytrends.interest_over_time()
    std = pd.DataFrame.from_dict(df)

    std['Moving Average'] = std[key_word].rolling(2).mean()

    close = float(std.loc['2021-01-15', 'Moving Average'])
    l = []
    l.append(float(std.loc['2021-01-15', 'Moving Average']))
    l.append(float(std.loc['2021-01-13', 'Moving Average']))
    l.append(float(std.loc['2021-01-14', 'Moving Average']))
    l.append(float(std.loc['2021-01-12', 'Moving Average']))
    l.append(float(std.loc['2021-01-11', 'Moving Average']))
    l.append(float(std.loc['2021-01-10', 'Moving Average']))
    l.append(float(std.loc['2021-01-09', 'Moving Average']))

    average = 0
    count = 0
    for x in l:
        average = average + x
        count += 1

    average = average / count

    ender = (float(std.loc['2021-01-15', 'Moving Average']) / average)
    print(ender)
    if ender > 1.10:
        ender = ender * 0.90
    elif ender > 1.15:
        ender = ender * 0.85
    elif ender < 1.00:
        ender = ender * 1.02

    df = yf.download(key_word, start='2021-01-01',
                     end='2021-01-16')['Adj Close']
    eat = pd.DataFrame.from_dict(df)
    fire = float(eat.loc['2021-01-15', 'Adj Close'])
    print(fire)
    fire = fire * ender
    print(fire)
    df1 = pd.DataFrame({
        "a": [
            "01-04", "01-05", "01-06", "01-07", "01-08", "01-11", "01-12",
            "01-13", "01-14", "01-15", "01-22"
        ],
        "b": [
            float(eat.loc['2021-01-04', 'Adj Close']),
            float(eat.loc['2021-01-05', 'Adj Close']),
            float(eat.loc['2021-01-06', 'Adj Close']),
            float(eat.loc['2021-01-07', 'Adj Close']),
            float(eat.loc['2021-01-08', 'Adj Close']),
            float(eat.loc['2021-01-11', 'Adj Close']),
            float(eat.loc['2021-01-12', 'Adj Close']),
            float(eat.loc['2021-01-13', 'Adj Close']),
            float(eat.loc['2021-01-14', 'Adj Close']),
            float(eat.loc['2021-01-15', 'Adj Close']), fire
        ]
    })

    sns.set()

    sns.lineplot(x=df1['a'], y=df1['b'])

    plt.title("Predicted Stock Price {}".format(key_word))
    plt.ylabel("Price")
    plt.xlabel("Date")
    plt.savefig('template/static/images/actual.png')
    plt.close()
Example #10
0
def get_trends(kw_list, periods):
    #print(kw_list, periods)
    pytrends = TrendReq()
    #cat 0 is all categories
    pytrends.build_payload(kw_list, cat=0, timeframe=periods)
    return pytrends.interest_over_time()
Example #11
0
import pandas, numpy, random, requests

#MeCab設定
tagger_neolog = MeCab.Tagger(
    "-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
tagger = MeCab.Tagger("-Ochasen")

#kakasi設定
kakasi = kakasi()
kakasi.setMode('J', 'H')  # J(Kanji) to H(Hiragana)
conv = kakasi.getConverter()

#使用可能文字列の正規表現
p = re.compile('[0-9a-zA-Zあ-んー\u30A1-\u30F4]+')

pytrends = TrendReq(hl='ja-JP', tz=360)
trends = pytrends.trending_searches(pn='japan')
trends = trends.values.tolist()

trendList = []
# print("-----Googleトレンド上位20語-----")
for t in trends:
    # print(t[0])
    if p.fullmatch(t[0]):
        #トレンドが正規表現にマッチ
        trendList.append(t[0])
    else:
        #トレンドが正規表現にマッチせず
        ans = ""
        words = []
        result = tagger.parse(t[0]).split("\n")
Example #12
0
def get_search_data(GroupName, setting_name=1, fromwhere=1):
    d = datetime.datetime.today()
    d = d - timedelta(days=1)
    endDate = d.strftime('%Y-%m-%d')

    d = d - timedelta(days=skfWk)
    startDate = d.strftime('%Y-%m-%d')
    print(startDate)
    groupName = GroupName
    if (setting_name == 2):
        groupName += " 주가"

    if (fromwhere == 1):
        body = '{\"startDate\":\"' + startDate + '\",\"endDate\":\"' + endDate + '\",\"timeUnit\":\"date\",\"keywordGroups\":[{\"groupName\":\"' + groupName + '\",\"keywords\":["' + groupName + '"]}]}';
        # print(body)
        request = urllib.request.Request(ourl)
        request.add_header("X-Naver-Client-Id", client_id)
        request.add_header("X-Naver-Client-Secret", client_secret)
        request.add_header("Content-Type", "application/json")
        response = urllib.request.urlopen(request, data=body.encode("utf-8"))
        rescode = response.getcode()
        if (rescode == 200):
            response_body = response.read()
            ans = response_body.decode(encoding="utf-8", errors="replace")
        else:
            print("Error Code:" + rescode)
            assert 1 == 2
        dic = {}
        ans = ans.split("{")
        for i in range(len(ans)):
            ans[i] = re.split(",|:|}", ans[i])
            if (len(ans[i]) < 4): continue;
            if (ans[i][0] == '"period"' and ans[i][2] == '"ratio"'):
                if (float(ans[i][3]) == 0):
                    ans[i][3] = 1
                dic[ans[i][1][1:11]] = float(ans[i][3])
        return dic
    elif (fromwhere == 2):
        pytrends = TrendReq(hl='ko', tz=540)
        keywords = [groupName]
        pytrends.build_payload(keywords, cat=0, timeframe='today 3-m', geo='KR', gprop='')
        getcompareinfo = pytrends.interest_over_time()
        dic = {}
        for i in getcompareinfo[groupName].index:
            if (getcompareinfo[groupName][i] == 0):
                dic[str(i)[:10]] = 1
            else:
                dic[str(i)[:10]] = getcompareinfo[groupName][i]
        ndic = {}
        nwday = d
        for i in dic:
            print(type(i), i)
        print("ASDASD")
        while True:
            print(d.strftime)
            ndic[d.strftime('%Y-%m-%d')] = dic[d.strftime('%Y-%m-%d')]
            d += timedelta(days=1)
            if (d.strftime('%Y-%m-%d') == endDate):
                break;

        return ndic
Example #13
0
    ['2019', 'netflix', 'top', 'best', 'watch', '2018', 'bbc', 'new'],
    'tv show': [],
    'film': [
        '2018', 'favourite', '2019', 'camera', 'news', 'netflix', 'window',
        'times', 'cling', 'home', 'cinema', 'polaroid', 'instax', 'izle'
    ],
    'movie':
    ['box', '123', 'news', 'house', 'movies', 'cinema', 'putlocker', 'hindi']
}

kw_dic, kw_list = gtf.kw_prep(keywords, kw_exclusions)

# Connect to Google
pytrends = TrendReq(hl='en-UK',
                    tz=-60,
                    timeout=(25, 25),
                    retries=5,
                    backoff_factor=2)

##################
# MAIN DASHBOARD #
##################

# Define window for back fill

start_date = "2019-01-01"
stop_date = datetime.date.today().strftime("%Y-%m-%d")

start = datetime.datetime.strptime(start_date, "%Y-%m-%d")
stop = datetime.datetime.strptime(stop_date, "%Y-%m-%d")
Example #14
0
def home(request):

    regex1 = r"""
                [^a-zA-Z0-9 ] # [^] = match any character not in the set
                              # set = all characters a-z, A-Z, 0-9 and spaces
                """
    pattern1 = re.compile(regex1, re.VERBOSE)

    # Generating list of lists of currently trending keywords with punctuation stripped.
    # Punctuation breaks .interest_over_time() method in some cases at least.
    # .interest_over_time() expects a list for parameter kw_list

    try:
        pytrends = TrendReq(hl='en-US', tz=360)
        response = pytrends.trending_searches(pn='united_states')
        data = json.loads(response.to_json())["0"]
        keyword_list = [[pattern1.sub(" ", value)]
                        for key, value in data.items()]
        #print("keyword_list:", keyword_list)

    except Exception as e:
        keyword_list = []
        print()
        print(
            "error trending_searches: google is probably rate-limiting you: ",
            e)
        print()
        messages.warning(request,
                         "You've already hit the API: trending_searches.")

    # Get interest over time and store in single dataframe
    combined_df = pd.DataFrame()

    # alternative way to clean your database
    """
    print("Trend.objects.all():/n", Trend.objects.all())
    Trend.objects.all().delete()
    print("Trend.objects.all():/n", Trend.objects.all())
    print("Keyword.objects.all():/n", Keyword.objects.all())
    Keyword.objects.all().delete()
    print("Keyword.objects.all():/n", Keyword.objects.all())
    """

    try:
        # search for each item separately
        for term in keyword_list:
            pytrends.build_payload(kw_list=term,
                                   cat=0,
                                   timeframe='now 1-d',
                                   geo='US',
                                   gprop='')
            time.sleep(1)
            # make call
            data = pytrends.interest_over_time()
            # drop unused column
            data = data.drop(labels=['isPartial'], axis='columns')
            # convert index into str from Datetime
            new_index = data.index.astype('str')
            data = data.set_index(new_index)
            # convert dataframe to dictionary and remove keyword
            data2 = data.to_dict()[term[0]]
            #print(data2)

            if Keyword.objects.filter(name=term[0]).exists():
                # if keyword already exists, get old object. save only the trend
                print("object with name '{}' already exists in the db".format(
                    term[0]))
                # .filter() returns a list, so object will be at [0]
                keyword = Keyword.objects.filter(name=term[0])[0]
                #trend = Trend(data=str(data.to_json()), keyword=keyword)
                trend = Trend(data=json.dumps(data2), keyword=keyword)
                trend.save()
            else:
                # if keyword is new, create a new object for it. save keyword and trend
                keyword = Keyword(name=term[0])
                #trend = Trend(data=str(data.to_json()), keyword=keyword)
                trend = Trend(data=json.dumps(data2), keyword=keyword)
                keyword.save()
                trend.save()
            #print(data)
            combined_df = pd.concat([combined_df, data], axis=1, sort=False)

    except Exception as e:
        print()
        print(
            "error interest_over_time: google is probably rate-limiting you: ",
            e)
        print()
        messages.warning(request,
                         "You've already hit the API: interest_over_time.")

    # see django output in terminal for verification
    print(combined_df)
    print("Keyword.objects.all():", Keyword.objects.all())
    print("Keyword.objects.count():", Keyword.objects.count())
    print("Trend.objects.all():", Trend.objects.all())
    print("Trend.objects.count():", Trend.objects.count())
    if len(keyword_list) > 0:
        test_kw = keyword_list[0][0]
    print("Trend.objects.filter(keyword__name='{}')".format(test_kw),
          Trend.objects.filter(keyword__name=test_kw))

    all_trends = serializers.serialize("json",
                                       Trend.objects.all(),
                                       use_natural_foreign_keys=True)

    context = {
        "props": {
            "trends": json.loads(all_trends),
        },
    }

    return render(request, 'pages/home.html', context)
data_close = pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
lista_cryptos= ['Bitcoin', 'Ethereum']
i= 0
crypto_list= investpy.crypto.get_cryptos_list()
selected_cryptos= crypto_list[0:50]
#%%
for crypto in tqdm(lista_cryptos):
    df= investpy.crypto.get_crypto_historical_data(crypto, '01/01/2020', '01/04/2021', as_json=False, order='ascending', interval='Daily')
    i= i+1
    data_close[i]= df.iloc[:,3]

# %%
df_interes_cryptos= pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
trends = {}
i=1
pytrends = TrendReq()
for cryptos in selected_cryptos:
    pytrends.build_payload(kw_list= [cryptos], geo='US', timeframe='2020-01-01 2021-01-01')
    trends[i]= pytrends.interest_over_time()
    i += 1
    time.sleep(1)
df_trends = pd.concat(trends, axis=1)
# %%
df_trends
#%%
data_close.columns= selected_cryptos
# %%
df_interes_cryptos

# %%
correlacion = pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
Example #16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 11 14:05:25 2020

@author: alfiantjandra
"""
from pytrends.request import TrendReq
import pandas as pd
import pycountry
import time

pytrend = TrendReq(hl='en-GB', tz=360)
colnames = ["keywords"]
""" list of keywords ( max. 5) """
df2 = ['coronavirus']
df2 = [
    "loss of taste", "covid symptoms", "loss of smell", "face mask",
    "coronavirus vaccine", "covid testing"
]
""" Creating dictionary of us code. Some regions aren't included """
xd = list(pycountry.subdivisions.get(country_code='US'))
list_subdivision = []
for i in range(len(xd)):
    list_subdivision.append(xd[i].code)

outlying_area = ['US-PR', 'US-GU', 'US-AS', 'US-MP', 'US-VI', 'US-UM', 'US-AK']
for x in outlying_area:
    list_subdivision.remove(x)
''' Scrape data and format into a single dataset; output: final_result '''
for y in list_subdivision:
Example #17
0
# Import manipulation and visualisation packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import yfinance as yf
import datetime
import time
import requests

# Import pytrends API
from pytrends.request import TrendReq

# Create a pytrend object. Request data from Google. hl parameter specifies host-language.
pytrends = TrendReq(hl='en-US', tz=360) #, tz=360, retries=10, backoff_factor=0.5

# Daily Trend in Germany
ts_df = pytrends.trending_searches(pn='germany')
ts_df.head()

# Extract data for Covid-related keywords
kw_list = ["lockdown", "vaccine", "unemployment", "zoom"]
pytrends.build_payload(kw_list, cat='0', timeframe='2019-10-01 2021-01-01', geo='US', gprop='')
df_covid = pytrends.interest_over_time()#.drop(['isPartial'], axis = 1)
df_covid.head()

# Plot data
df_covid.plot()
plt.ylabel("relative weekly searches")
plt.savefig('covid_searches.png')
Example #18
0
device_token = ""
labels = ['Eniram', 'Tesla']
values = {}


def send_notification(label):
    payload = Payload(alert="%s interest changed!" % label,
                      sound="default",
                      badge=1,
                      mutable_content=True)
    apns.gateway_server.send_notification(device_token, payload)


def update_interests():
    for label in labels:
        pytrends.build_payload(kw_list=[label], timeframe='today 5-y')
        interests = pytrends.interest_over_time()
        current_interest = interests[label][-1]
        if label not in values or values[label] != current_interest:
            values[label] = current_interest
            send_notification(label)
            print("Interest for %s updated to %d" % (label, current_interest))


pytrends = TrendReq(google_email, google_pass)
schedule.every().hour.do(update_interests)
update_interests()
while True:
    schedule.run_pending()
    time.sleep(1)
Example #19
0
import numpy as np
from datetime import datetime

PD_MAX_ROWS = 500
PD_MAX_COLUMNS = 5100
PD_CONSOLE_WIDTH = 2000
PD_MAX_COLWIDTH = 1000

pd.options.display.max_rows = PD_MAX_ROWS
pd.options.display.max_columns = PD_MAX_COLUMNS
pd.options.display.width = PD_CONSOLE_WIDTH
pd.options.display.max_colwidth = PD_MAX_COLWIDTH

SEARCH_VALUE = 'pornhub'

pytrends = TrendReq(hl='en-US', geo='ZA')

START_DATE = pd.to_datetime('2016-01-01')
END_DATE = pd.to_datetime('2021-05-16')

full_date_range = pd.DataFrame(
    {'date_index': pd.date_range(START_DATE, END_DATE)})

# Note, hourly data is in UTC - this is the same as weekly data
# Need to adjust hours after apply adjustments based on weekly data
results_hourly_by_week = pytrends.get_historical_interest(
    ['pornhub'],
    year_start=START_DATE.year,
    month_start=START_DATE.month,
    day_start=START_DATE.day,
    hour_start=0,
Example #20
0
#import jinja
import pandas as pd
from flask import Flask, render_template, request, redirect, send_file, jsonify
from pytrends.request import TrendReq

app = Flask(__name__)

app.config['DEBUG'] = True

pytrends = TrendReq(hl='en-GB', tz=212)


@app.route('/interest-over-time', methods=['GET', 'POST'])
def process_keywords():
    if request.method == 'POST':
        input = request.form

        start_date = input["start-date"]
        end_date = input["end-date"]
        timeframe = start_date + " " + end_date

        print(timeframe)

        keyword_list = input["keywords"].replace('\r',
                                                 '').replace('\n',
                                                             '').split(",")
        print(input["keywords"])
        print(keyword_list)
        name_of_file = keyword_list[0] + keyword_list[-1] + ".csv"

        payload_df = pytrend_payload(keyword_list, name_of_file, timeframe)
Example #21
0
    def run(
        self,
        project_name: str,
        start_date: datetime.date,
        end_date: datetime.date,
        keywords: list,
        gd_folder_id: str,
        output_dir_path: str,
    ) -> bool:
        """execute."""
        # output file
        utf8_output_file_name = '%s_utf8.csv' % (project_name)
        sjis_output_file_name = '%s.csv' % (project_name)
        now = datetime.now().strftime('%Y%m%d%H%M%S')

        # term
        term = str(start_date) + ' ' + str(end_date)

        # pytrend
        pytrend = TrendReq(tz=-540)

        # google drive
        gauth = GoogleAuth()
        gauth.CommandLineAuth()
        drive = GoogleDrive(gauth)

        # make directoy
        target_folder_id = self.__search_folder(
            drive,
            gd_folder_id,
            now,
        )
        if target_folder_id == '':
            f = drive.CreateFile({
                'title':
                now,
                'mimeType':
                'application/vnd.google-apps.folder',
                'parents': [{
                    'kind': 'drive#fileLink',
                    'id': gd_folder_id,
                }],
            })
            f.Upload()

            # get folder_id
            target_folder_id = self.__search_folder(
                drive,
                gd_folder_id,
                now,
            )

            if target_folder_id == '':
                raise SystemError('can\'t make directory.')

        df = ''
        for keyword in keywords:
            # get data from google trend
            pytrend.build_payload(kw_list=[keyword], geo='JP', timeframe=term)
            df_part = pytrend.interest_over_time()[keyword]

            # merge dataframe
            if isinstance(df, pd.core.series.Series) or isinstance(
                    df, pd.core.frame.DataFrame):
                df = pd.concat([df, df_part], axis=1)
            else:
                df = df_part

        # rename index
        indexes = df.index
        new_indexes = []
        for i, v in enumerate(indexes):
            j = i + 1
            try:
                w = indexes[j] - timedelta(1)
            except IndexError:
                w = v + timedelta(6)
            new_index = v.strftime('%Y-%m-%d') \
                + ' - ' \
                + w.strftime('%Y-%m-%d')
            new_indexes.append(new_index)

        df.index = new_indexes

        # set file path
        utf8_output_file_path = output_dir_path + utf8_output_file_name
        sjis_output_file_path = output_dir_path + sjis_output_file_name

        # write to csv
        df.to_csv(utf8_output_file_path, index=True, header=True)
        # encode
        with open(sjis_output_file_path, 'w', encoding='cp932') as f_out:
            with open(utf8_output_file_path, 'r', encoding='utf-8') as f_in:
                f_out.write(f_in.read())

        # send to google drive
        f = drive.CreateFile({
            'title':
            sjis_output_file_name,
            'mimeType':
            'text/csv',
            'parents': [{
                'kind': 'drive#fileLink',
                'id': target_folder_id,
            }],
        })
        f.SetContentFile(sjis_output_file_path)
        f.Upload()

        return True
Example #22
0
# - Suggestions : returns a list of additional suggested keywords that can be used to refine a trend search.

# # US Top 10 Actors (Male) Ranking Test

from pytrends.request import TrendReq
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm
import seaborn as sns
import matplotlib
# %matplotlib inline

# API Connection
# 接続言語、タイムゾーンを指定しAPIに接続
pytrends = TrendReq(hl="en-US",tz=360)

# US Top 10 Actors
# Define tag_list
name_list_1 = ["Tom Hanks","Christian Bale","Leonardo DiCaprio","Morgan Freeman"]
name_list_2 = ["Robert De Niro","Anthony Hopkins","Denzel Washington","Robert Downey Jr."]
name_list_3 = ["Johnny Depp","Al Pacino","Daniel Day-Lewis","Kevin Spacey"]

# +
pytrends.build_payload(name_list_1, timeframe='2014-01-01 2019-04-15', geo='US')
df1 = pytrends.interest_over_time()

pytrends.build_payload(name_list_2, timeframe='2014-01-01 2019-04-15', geo='US')
df2 = pytrends.interest_over_time()

pytrends.build_payload(name_list_3, timeframe='2014-01-01 2019-04-15', geo='US')
Example #23
0
from pytrends.request import TrendReq
import matplotlib.pyplot as plt
import os

# 검색 keyword, 검색 기간 입력
keyword = "Galaxy Fold"
period = "today 3-m"  #검색기간: 최근 3개월

# Google Trend 접속
trend_obj = TrendReq()
trend_obj.build_payload(kw_list=[keyword], timeframe=period)

# 시간에 따른 Trend 변화
trend_df = trend_obj.interest_over_time()
print(trend_df.head())

# 그래프 출력
plt.style.use("ggplot")
plt.figure(figsize=(14, 5))
trend_df[keyword].plot()
plt.title("Google Trends over time", size=15)
plt.legend(labels=[keyword], loc="upper right")

# 그래프 파일 저장
cwd = os.getcwd()
output_filepath = os.path.join(cwd, "output",
                               "google_trend_over_time_%s.png" % keyword)
plt.savefig(output_filepath, dpi=300)
plt.show()
Example #24
0
    # Then merge the two dataframe (trend data and flag of whether it was overlapped)
    df = pd.concat([df, ol], axis=1)
    df.columns = [keyword, 'overlap']

    # Send back only the df of our requested date range
    df = df[start_d:init_end_d]

    # Re-normalized to the overall maximum value of 100 search interest
    df[keyword] = (100 * df[keyword] / df[keyword].max()).round(decimals=0)

    return df


if __name__ == "__main__":
    pytrend = TrendReq(hl='en-US')
    keyword = 'iphone'
    start = '2019-01-01'
    end = '2020-10-15'
    geo = 'US'
    cat = 0
    gprop = ''

    overlapping = get_daily_trend(pytrend,
                                  keyword,
                                  start,
                                  end,
                                  geo=geo,
                                  cat=cat,
                                  gprop=gprop,
                                  verbose=True)
Example #25
0

# In[16]:


#Get trends for paths

#Get trends for paths

import sys
from pytrends.request import TrendReq
import time


pytrends = TrendReq(
    retries = 5,
)

paths_related_queries = []

size = end - beginning

with tqdm(total = size, file=sys.stdout) as pbar:
    
    iteration = beginning
    
    while iteration<end:
        
        paths = cleaned_splitted_paths[iteration][0]
        # print(paths)
        
Example #26
0
        texto = str(resumen)
        traduccion = (bing(texto, dst='en'))
        analisis = TextBlob(traduccion)
        factor = ((analisis.polarity * 5) + 5) * (1 - analisis.subjectivity)
        lista.append(factor)
    except:
        pass
print(
    "\nWhat Google News think about %s being 10 the best and 1 the worst is %s"
    % (z, sum(lista[:]) / len(lista[:])))

google_username = ""
google_password = ""
path = ""

pytrend = TrendReq(google_username, google_password, custom_useragent='')
pytrend.build_payload(kw_list=[z])

interest_over_time_df = pytrend.interest_over_time()
thirty = 30
sixty = 60
ninety = 90
oneeighty = 180
ten = 10

yesterday = interest_over_time_df.tail(ten)
last_30 = interest_over_time_df.tail(thirty)
last_60 = interest_over_time_df.tail(sixty)
last_90 = interest_over_time_df.tail(ninety)
last_180 = interest_over_time_df.tail(oneeighty)
Example #27
0
 def googleTrends(self):
     # Create file and path
     gname = self.runPath + 'googleTrends.run'
     glog = self.logPath + 'googleTrends.log'
     # Test if already running
     if os.path.exists(gname):
         return
     # Write lock file
     with open(gname, 'w') as file:
         file.write(str(datetime.now()))
     # Get list of data files
     gCfgs = self.listCfgFiles('senttrend')
     for gfile in gCfgs:
         gConf = self.readCfgFile('senttrend', gfile)
         if gConf['enabled']:
             # Log starting point of backtest
             with open(glog, 'a') as file:
                 file.write(
                     str(datetime.now()) + " -- Trending of " +
                     gConf['keyword'] + " started...\n")
                 # Create ID
             # Create ID
             # id = gConf['keyword'].replace(' ','_').lower()+ '_' + gConf['period'] + '_' + gConf['cat']
             # Create period name
             if gConf['period'] == '4h':
                 per = 'now 4-H'
             if gConf['period'] == '1D':
                 per = 'now 1-d'
             if gConf['period'] == '1W':
                 per = 'now 7-d'
             # Initialize pyTrend
             pytrend = TrendReq(hl='en-US', tz=0, timeout=(10, 25))
             # Get trends
             pytrend.build_payload([gConf['keyword']],
                                   cat=gConf['cat'],
                                   timeframe=per,
                                   geo=gConf['geo'],
                                   gprop=gConf['type'])
             # Create Dataframe
             tdf = pytrend.interest_over_time()
             # Insert into database
             tinms = int(round(time.time() * 1000))
             lastval = tdf[gConf['keyword']].tail(1)[0]
             sqlinstrend = 'INSERT into trend_' + gConf[
                 'id'] + ' VALUES (' + str(tinms) + ',' + str(lastval) + ')'
             try:
                 self.db.session.execute(sqlinstrend)
             except BaseException:
                 # Create Database table class
                 table_creation_sql = 'CREATE TABLE IF NOT EXISTS trend_' + gConf[
                     'id'] + ' (Date BIGINT NOT NULL, PercVal INT, PRIMARY KEY(Date))'
                 # Create SQL table
                 self.db.session.execute(table_creation_sql)
                 self.db.session.commit()
                 self.db.session.execute(sqlinstrend)
             self.db.session.commit()
             # Echo df
             # self.ll(tdf.tail(1))
             # self.ll(tdf[gConf['keyword']].tail(1)[0])
     # Remove File Lock
     os.remove(gname)
Example #28
0
# Static Vars
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
FILENAME = SCRIPT_DIR + "/google-trends-wordcloud.png"
GUSERNAME = os.environ.get('GUSERNAME')
GPASS = os.environ.get('GPASS')

USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36'
KEYWORD = 'Bitcoin'
TRENDS_TF = "now 7-d"
EXTRA_STOPWORDS = ['physique', 'calculator', 'vs', 'free', 'cuban', 'mark']

########################################################################

# Make Trends Request
logger.info('Setting google trend object')
pytrend = TrendReq(GUSERNAME, GPASS, custom_useragent=USER_AGENT)

logger.info('Building google trend payload')
pytrend.build_payload(kw_list=[KEYWORD], timeframe=TRENDS_TF)

logger.info('Getting related queries')
related = pytrend.related_queries()

text = re.sub("\\n[0-9]+", '', related[KEYWORD]['rising']['query'].to_string())
text = re.sub("[0-9]+", '', text)

logger.info('Words')
print(text)

#####################
# Make Word Cloud
 def _fetch(self, pn: str) -> pd.DataFrame:
     pytrend = TrendReq(hl='ja-jp', tz=540)
     return pytrend.trending_searches(pn=pn)
Example #30
0
 def test_top_charts(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.top_charts(cid='actors', date=201611))