Example #1
0
def tracking_in_time_keywords(kw_list):
    pytrends = TrendReq(hl='country', tz=<timezone you want>)
    future_dataframe={}
    c=1
    for i in range(len(kw_list)):
        if i%2==0:
        
            try:
                print("Requesting ",str(kw_list[i]))
                pytrends.build_payload(kw_list[i], cat=kw_list[i+1],timeframe=dates, geo='country', gprop='')
                
                future_dataframe[c]=pytrends.interest_over_time() 
                future_dataframe[c].drop(['isPartial'], axis=1,inplace=True)
                c+=1
                result = pd.concat(future_dataframe, axis=1)

                # this is for intense use of the script, remove it to avoid Cloud Function timeout (and save money)
                secs=int(random.randrange(10, 50))
                print("Sleeping {} seconds before requesting ".format(secs),str(kw_list[i]))
                timer.sleep(secs)
                print("Done")

            except:
                print("***","\n","Error with ",kw_list[i],"or not enough trending percentaje","\n","***")
    
    result.columns = result.columns.droplevel(0)
    df1=result.unstack(level=-1)
    df2=pd.DataFrame(df1)
    df2.reset_index(inplace=True)
    df2.columns = ["keyword","date","trend_index"]
    return df2
    def queryGTrends(self, kw_list):
        google_username = "******"
        google_password = "******"
        pytrends = TrendReq(hl='en-US')

        pytrends.build_payload(kw_list, timeframe='all')
        #pytrends.build_payload(kw_list)

        # Interest over time
        time = pytrends.interest_over_time()
        #time.mean()
        #print(time.mean())

        # Related Queries, returns a dictionary of dataframes
        #related_queries_dict = pytrends.related_queries()
        #print(related_queries_dict)

        #related_topics_dict = pytrends.related_topics()
        #print(related_topics_dict)

        # Get Google Hot Trends data
        # trending_searches_df = pytrends.trending_searches()
        # print(trending_searches_df.head())

        # Get Google Keyword Suggestions
        #suggestions_dict = pytrends.suggestions(keyword='GIS')
        # print(suggestions_dict)
        return time  #.mean()
Example #3
0
    def retrieve_google_trends(self, search, date_range):

        # 设置趋势提取对象
        pytrends = TrendReq(hl='en-US', tz=360)
        kw_list = [search]

        try:

            # 创建搜索对象
            pytrends.build_payload(kw_list,
                                   cat=0,
                                   timeframe=date_range[0],
                                   geo='',
                                   gprop='news')

            #随时检索
            trends = pytrends.interest_over_time()

            related_queries = pytrends.related_queries()

        except Exception as e:
            print('\nGoogle Search Trend retrieval failed.')
            print(e)
            return

        return trends, related_queries
def get_requirements():
    skill = request.args.get('skill')
    if (skill is None):
        abort(400)

    pytrends = TrendReq("*****@*****.**",
                        "qwertyqwerty",
                        hl='en-US',
                        tz=360,
                        custom_useragent=None)

    pytrends.build_payload([skill],
                           cat=0,
                           timeframe='today 5-y',
                           geo='',
                           gprop='')

    interest_over_time_df = pytrends.interest_over_time()

    resp = make_response(
        interest_over_time_df.to_json(path_or_buf=None,
                                      orient=None,
                                      date_format='epoch',
                                      double_precision=10,
                                      force_ascii=True,
                                      date_unit='ms',
                                      default_handler=None,
                                      lines=False))

    resp.headers['Access-Control-Allow-Origin'] = "*"

    return resp
Example #5
0
def interest_related_topics(key, date):
    kw = [key]
    pytrends.build_payload(kw, cat=0, timeframe=date, geo='', gprop='')
    related = pytrends.related_topics()[key]['top']
    kw_list = []
    for i in related['topic_title']:
        if i not in kw_list:
            kw_list.append(i)
        if len(kw_list) == 5:
            break

    pytrends.build_payload(kw_list, cat=0, timeframe=date, geo='', gprop='')
    interest = pytrends.interest_over_time().reset_index()

    fig, ax = plt.subplots(figsize=(8,6))
    for i in range(len(kw_list)):
        no = i+1
        ax1 = sns.lineplot(x="date", y=kw_list[i],
                           label=kw_list[i], data=interest, ax=ax)

    plt.axhline(y=0, color='#414141', linewidth=1.5, alpha=.5)
    ax.set_yticks([0, 25, 50, 75, 100])
    plt.title('Interest Over Time of Related Topics',fontsize=25, weight = 'bold', fontfamily='sans-serif')
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Interest', fontsize = 15)
    plt.xlabel("")
    plt.legend(frameon=False,bbox_to_anchor=(1, 0.8) )
    plt.savefig("image/google_topics.png",bbox_inches='tight',dpi=100)
    plt.close()
Example #6
0
def scrape_word(text, initial=False):

    kw_list = [text]
    pytrends.build_payload(kw_list=kw_list,
                           cat=0,
                           timeframe='today 5-y',
                           geo='GB')

    # Interest Over Time
    interest_over_time_df = pytrends.interest_over_time()
    print(interest_over_time_df)
    flattened = interest_over_time_df[[text]].values.tolist()

    interest_flattened = [text]

    for i in range(len(flattened)):
        interest_flattened.append(flattened[i][0])

    print(interest_flattened)

    if initial:
        with open('trends.csv', 'w') as result_file:
            wr = csv.writer(result_file, dialect='excel')
            wr.writerow(interest_flattened)
    else:
        with open('trends.csv', 'a') as result_file:
            wr = csv.writer(result_file, dialect='excel')
            wr.writerow(interest_flattened)
Example #7
0
def create_google_trends_df(data_frame, search):
    """Create a DataFrame with google trends for a search.""" 
    # create data_range
    date_range = \
        [f'{get_df_start_date(data_frame)} {get_df_end_date(data_frame)}']
    
    # Set up the trend fetching object
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = [search]

    try:
        # Create the search object
        pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='',
            gprop='news')
        
        # Retrieve the interest over time
        trends = pytrends.interest_over_time()
        #trends = pd.DataFrame(trends_series, search)

    except Exception as e:
        print('\nGoogle Search Trend retrieval failed.')
        print(e)
        return

    # Upsample the data to daily
    trends = trends.resample('D').mean()
    # add column indicating how long since trend updated
    trends = create_days_since_valid_value(trends, search, 'Days since updated')
    # clean up na values from upsample
    trends = pd.DataFrame.fillna(trends, method='ffill')
    
    return trends
Example #8
0
    def retrieve_google_trends(self, search, date_range):

        # Set up the trend fetching object
        pytrends = TrendReq(hl='en-US', tz=360)
        kw_list = [search]

        try:

            # Create the search object
            pytrends.build_payload(kw_list,
                                   cat=0,
                                   timeframe=date_range[0],
                                   geo='',
                                   gprop='news')

            # Retrieve the interest over time
            trends = pytrends.interest_over_time()

            related_queries = pytrends.related_queries()

        except Exception as e:
            print('\nGoogle Search Trend retrieval failed.')
            print(e)
            return

        return trends, related_queries
Example #9
0
def get_topk_related(keyword, k=3):

    if keyword in dict_relate:
        return dict_relate[keyword]
    pytrends.build_payload([keyword], cat=0, timeframe='today 1-m', geo='', gprop='')
    ret = list(pytrends.related_topics().values())[0]
    ret = set(ret.title[0:k].values) if ret is not None else None
    dict_relate[keyword] = ret
    return ret
Example #10
0
def hello_world():
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = ["Blockchain"]
    pytrends.build_payload(kw_list,
                           cat=0,
                           timeframe='today 5-y',
                           geo='',
                           gprop='')
    a = pytrends.interest_over_time()
    return str(a)
Example #11
0
def getPayload(searchterm, startdate, enddate):
  print("Finding data for " + searchterm + " between " + startdate + ", and " + enddate + "...");
  #setting up connection to google 
  pytrends = TrendReq(hl='en-US', tz=360);
  kw_list = [searchterm];
  timeFrame = '' + startdate + ' ' + enddate;
  #building payload, cat always 0, geo and gprop empty for now
  pytrends.build_payload(kw_list, cat=0, timeframe = timeFrame, geo='', gprop='');
  
  return pytrends.interest_over_time().to_string();
Example #12
0
def pull_trends(kw_list):
    pytrends = TrendReq(hl='en-US', tz=360)
    print(kw_list)

    ####Code to get averages
    #try:
    pytrends.build_payload(kw_list, cat=0, timeframe='now 1-H', geo='US', gprop='')
    y = pytrends.get_historical_interest(kw_list, year_start=2018, month_start=8, day_start=1, hour_start=0, year_end=2018, month_end=10, day_end=10, hour_end=0, cat=0, geo='US', gprop='', sleep=0)
    print(y)
    for each in kw_list:
        mu = y[str(each)].mean()
        city_mean_list.append(each)
        city_mean_list.append(mu)
Example #13
0
def interest_by_country(key, date):
    kw_list = [key]
    pytrends.build_payload(kw_list, cat=0, timeframe=date, geo='', gprop='')
    interest_country = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=False, inc_geo_code=True)
    interest_country = interest_country.sort_values(key, ascending=False)
    interest_country = interest_country.iloc[:19, :].reset_index()
    fig, ax = plt.subplots(figsize=(10,6))
    plt.axhline(y=0, color='#414141', linewidth=1.5, alpha=.5)
    ax.set_yticks([0, 25, 50, 75, 100])

    ax = sns.barplot(x="geoName", y=kw_list[0], data=interest_country, palette='mako')
    plt.xticks(rotation=45, ha='right')
    plt.title('Search Interest By Country (Top 20)',fontsize=22, weight = 'bold', fontfamily='sans-serif')
    plt.xlabel("")
    plt.savefig("image/google_country.png",bbox_inches='tight',dpi=100)
    plt.close()
Example #14
0
def wordcloud_queries(key,date):
    pytrends.build_payload([key], cat=0, timeframe=date, geo='', gprop='')
    query = pytrends.related_queries()
    if query[key]['top'] is not None:
        input_top = list(query[key]['top']['query'])
        input_rising = list(query[key]['rising']['query'])
        def plot_wordcloud(output):
            text = " ".join(output)
            wordcloud = WordCloud(max_font_size=70, max_words=1000, colormap="Blues", background_color="white").generate(text)
            plt.figure(figsize=(8,6))
            plt.title("WordCloud of Top 25 Related Queries",fontsize=20, weight = 'bold', fontfamily='sans-serif')
            plt.imshow(wordcloud, interpolation="bilinear")
            plt.axis("off")
            plt.savefig("image/{}_google_wordcloud.png".format(key),bbox_inches='tight',dpi=100)
            plt.close()
        plot_wordcloud(input_top)
def GenerateTrends():
    print("Fetching Google Trends...", end="", flush=True)

    dbConfig = loadConfig(
        'C:\AppCredentials\CoinTrackerPython\database.config')

    con = pyodbc.connect(dbConfig[0]["sql_conn"])
    cursor = con.cursor()

    cursor.execute(
        "select name, id, symbol from CoinTracker.dbo.Market where id in (select coin_fk from CoinTracker.dbo.MarketHistory) and rank = 1 order by id"
    )
    rows = cursor.fetchall()

    if cursor.rowcount == 0:
        print("No Price sources found")
        return

    pytrends = TrendReq(hl='en-US', tz=360)

    for row in rows:
        kw_list = [str(row[0]), str(row[1])]
        pytrends.build_payload(kw_list)
        interest_over_time_df = pytrends.interest_over_time()
        print(interest_over_time_df)
        '''
		

		interest_by_region_df = pytrend.interest_by_region()

		print(interest_by_region_df.head())

		related_queries_dict = pytrend.related_queries()

		print(related_queries_dict)
		trending_searches_df = pytrend.trending_searches()

		print(trending_searches_df.head())

		top_charts_df = pytrend.top_charts(cid='actors', date=201611)

		print(top_charts_df.head())

		suggestions_dict = pytrend.suggestions(keyword='pizza')

		print(suggestions_dict)'''
    print("Done")
Example #16
0
def google_trends_five_years(inventory='strawberries'):
    """
    Google trends API scripts to collect daily google trends for the past five years on INVENTORY
    in the Bay Area (geo='US-CA-807')

    Result is saved to a csv file. 
    """

    try:
        pytrends = TrendReq(hl='en-US', tz=360)
        csv_path = os.path.join(os.getcwd(), 'backend', 'data')
        keyword = inventory
        start = '12/01/2012'
        end = end = '01/01/2018'
        past_five_years = pd.date_range(start=start, end=end, freq='M')
        past_five_years[0].date() + timedelta(days=1)

        for i in range(len(past_five_years) - 1):
            start_time = past_five_years[i].date() + timedelta(days=1)
            end_time = past_five_years[i + 1].date()
            time_frame = str(start_time) + " " + str(end_time)

            # call Google Trends API
            pytrends.build_payload([inventory],
                                   cat=0,
                                   timeframe=time_frame,
                                   geo='US-CA-807',
                                   gprop='')
            five_year_trend_temp = pytrends.interest_over_time()

            if i == 0:
                five_year_trend = five_year_trend_temp[keyword].to_frame(
                ).reset_index(level=['date'])
            else:
                five_year_trend = five_year_trend.append(
                    five_year_trend_temp[keyword].to_frame().reset_index(
                        level=['date']))

        # save it to csv file under "../appetite/backend/data" directory
        file_name = inventory + "_google_trends_five_years.csv"
        five_year_trend.to_csv(os.path.join(csv_path, file_name))

        return "Finished collecting five-year Google Trends data"
    except:
        return "Exception raised"
Example #17
0
    def pop2(lists):
        from pytrends.request import TrendReq

        # Create pytrends object, request data from Google Trends
        pytrends = TrendReq(hl='en-US', tz=360)

        # Extracts data based on our keywords
        kw_list = lists
        pytrends.build_payload(kw_list,
                               cat=0,
                               timeframe='today 5-y',
                               geo='',
                               gprop='')

        # Specify, get, and normalize data
        data = pytrends.interest_over_time()
        data.drop('isPartial', axis=1, inplace=True)
        normData = data.apply(lambda x: x / x.max(), axis=0)

        # Max normalized value from most recent date + index in list
        recent = normData.values[-1].tolist()
        max_value = max(recent)
        max_index = recent.index(max_value)

        # Name of most popular normalized item
        out = kw_list[max_index]
        pytrends.build_payload(kw_list=[out],
                               cat=0,
                               timeframe='today 5-y',
                               geo='',
                               gprop='')
        trend = pytrends.related_queries()
        trend_list = trend[out]['top']['query']

        # Adds variable number of hashtags to topList
        numHashtags = 5
        topList = []
        for i in range(numHashtags):
            topList.append(trend_list[i])

        # TopList is a list of hashtags, out is most popular item

        poplist = out
        return (poplist)
Example #18
0
def interest_over_time(key, date):
    kw = [key]
    pytrends.build_payload(kw, cat=0, timeframe=date, geo='', gprop='')
    interest = pytrends.interest_over_time().reset_index()

    fig, ax = plt.subplots(figsize=(8,6))

    axs1 = sns.lineplot(x="date", y=kw[0], data=interest, ax=ax)

    plt.axhline(y=0, color='#414141', linewidth=1.5, alpha=.5)
    ax.set_yticks([0, 25, 50, 75, 100])

    plt.title('"{}" Google Interest Over Time   '.format(key),fontsize=25, weight = 'bold', fontfamily='sans-serif')
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Interest', fontsize = 15)
    plt.xlabel("")
    ax.text(0.5, 0.6, key, horizontalalignment='center',verticalalignment='center', transform=ax.transAxes)
    plt.savefig("image/google_freq.png",bbox_inches='tight',dpi=100)
    plt.close()
Example #19
0
def get_trend_data(input):
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = [input]
    a = pytrends.build_payload(kw_list,
                               cat=0,
                               timeframe='today 5-y',
                               geo='',
                               gprop='')
    b = pytrends.interest_over_time()
    return b
def player_trends(list_of_players):
    pytrends = TrendReq(hl="en-US", tz=360)
    mean_list = []
    list_player_list = []
    play_list = get_player_list()
    for i in range(0, 100, 5):
        x = i
        list_player_list.append(play_list[x:x + 5])
    for i in list_player_list:
        kw_list = i
        pytrends.build_payload(kw_list,
                               cat=1077,
                               timeframe='today 5-y',
                               geo='',
                               gprop='')
        a = pytrends.interest_over_time()
        for i in kw_list:
            mean_list.append(a[i].mean())
    tup_list = zip(play_list, mean_list)
    return list(tup_list)
Example #21
0
def pull_trend_avg(kw_list):
    #print("pulling data")
    mean_list = []
    try:
        pytrends.build_payload(kw_list, cat=0, timeframe='now 1-H', geo='US', gprop='')
        z = pytrends.interest_over_time()

        for each in kw_list:
            mu = z[str(each)].mean()
            ratio = get_ratio(each, mu)
            mean_list.append([each, ratio])

        return mean_list

    except:
        print("You got yourself an error")
        for each in kw_list:
            mean_list.append([each, 0])

        return mean_list
Example #22
0
 def retrieve_google_trends(self,term,date_range):
   # Set up the trend fetching object
   pytrends = TrendReq(hl='en-US', tz=360)
   kw_list = [term]
   self.term = term
   try:
     # Create the search object
     pytrends.build_payload(kw_list, cat=0, timeframe=date_range, geo='', gprop='')
           
     # Retrieve the interest over time
     trends = pytrends.interest_over_time()
   except Exception as e:
     print('\nGoogle Search Trend retrieval failed.')
     print(e)
     return
   
   trends = trends.resample('D')
   trends = trends.reset_index(level=0)
   trends = trends.rename(columns={'date':'ds',term:'freq'})
   trends['freq'] = trends['freq'].interpolate()
       
   return trends
Example #23
0
def google_trends(query):

    # Set up the trend fetching object
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = [search]

    # Create the search object
    pytrends.build_payload(kw_list, cat=0, geo='', gprop='news')

    # Get the interest over time
    interest = pytrends.interest_over_time()
    print(interest.head())

    # Get related searches
    related_queries = pytrends.related_queries()
    print(related_queries)

    #       # Get Google Top Charts
    # top_charts_df = pytrend.top_charts(cid='actors', date=201611)
    # print(top_charts_df.head())

    return interest, related_queries
    def retrieve_google_trends(self, search, date_range):
        
        # Set up the trend fetching object
        pytrends = TrendReq(hl='en-US', tz=360)
        kw_list = [search]

        try:
        
            # Create the search object
            pytrends.build_payload(kw_list, cat=0, timeframe=date_range[0], geo='', gprop='news')
            
            # Retrieve the interest over time
            trends = pytrends.interest_over_time()

            related_queries = pytrends.related_queries()

        except Exception as e:
            print('\nGoogle Search Trend retrieval failed.')
            print(e)
            return
        
        return trends, related_queries
Example #25
0
def fetch():
    if request.method == 'GET':
        data = request.args.get('name')
        pytrends = TrendReq(hl='en-US', tz=360)
        # Preparing KEYWORD List
        kw_list = [data]
        # Build Payload
        pytrends.build_payload(kw_list,
                               cat=0,
                               timeframe='today 12-m',
                               geo='US',
                               gprop='')
        # Get result for interest_over_time
        #return str(pytrends.interest_over_time())
        df = pytrends.interest_over_time()
        df.to_json(r'data.json')
        with open('data.json') as f:
            d = json.load(f)
            print(d)
        return render_template('simple.html',
                               tables=[df.to_html(classes='data')],
                               titles=df.columns.values)
def GetG_trends_data():
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = ["bitcoin", "ethereum"]
    pytrends.build_payload(kw_list, timeframe='today 5-y', geo='US')
    df = pytrends.interest_over_time().tail(1)
    current_google_trends_datapoint = int(df.iat[0, 0])
    ethereum_datapoint = str(int(df.iat[0, 1]))
    print("Gtrends datapoint (ETH): " + str(ethereum_datapoint) +
          " (previous ATH was 9)")
    if current_google_trends_datapoint < 75:
        dangerzone = False
    else:
        dangerzone = True

    if dangerzone == True:
        if current_google_trends_datapoint < 85:
            print("Gtrends datapoint (BTC): " +
                  str(current_google_trends_datapoint) +
                  ", indicating that we are in the " +
                  colored("CAUTION ZONE", "yellow"))
        else:
            if current_google_trends_datapoint < 95:
                print("Gtrends datapoint (BTC): " +
                      str(current_google_trends_datapoint) +
                      ", indicating that we are in the " +
                      colored("HIGH CAUTION ZONE", "orange", attrs=["blink"]))
            else:
                if current_google_trends_datapoint < 100:
                    print("Gtrends datapoint (BTC): " +
                          str(current_google_trends_datapoint) +
                          ", indicating that we are in the " +
                          colored("DANGER ZONE", "red", attrs=["blink"]))
    else:
        print("Gtrends datapoint (BTC): " +
              str(current_google_trends_datapoint) +
              ", indicating that we are in the " +
              colored("SAFE ZONE", "green"))
Example #27
0
def get_trend_result(result):
    result_list = []
    for r in result:
        if len(r)<2:
            continue
        length = len(r)
        if length <= 5:   
            
            pytrends.build_payload(r, cat=0, timeframe='today 1-m')
            interest_over_time_df = pytrends.interest_over_time()
            interest_over_time_df = interest_over_time_df.mean()[:-1]
            interest_over_time_df = interest_over_time_df.map(lambda x: '%d' % round(x))
            interest_over_time_df = interest_over_time_df.to_dict()
            result_list.append(interest_over_time_df)
        else:
            l_r = r[0:length//2][:5]
            r_r = r[length//2-1:][:5]
            pytrends.build_payload(l_r, cat=0, timeframe='today 1-m')
            df_1 = pytrends.interest_over_time()
            df_1 = df_1.mean()[:-1]            
            pytrends.build_payload(r_r, cat=0, timeframe='today 1-m')
            df_2 = pytrends.interest_over_time()
            df_2 = df_2.mean()[:-1]
            
            if(df_1[-1] > 0 and df_2[0] >0):
                r1 = df_1/df_1[-1]
                r2 = df_2/df_2[0]
            else:
                r1=df_1
                r2=df_2
            interest_over_time_df = pd.concat([r1, r2])
            interest_over_time_df = interest_over_time_df.map(lambda x: '%d' % round(x))
            interest_over_time_df = interest_over_time_df.to_dict()
            result_list.append(interest_over_time_df)

    return result_list
Example #28
0
    if os.name == 'posix':
        sl = '/'
    elif os.name == 'nt':
        sl = '\\'


# timezone 360 = US CST
pytrends = TrendReq(hl='en-US', tz=360)
kw_list = ["Bitcoin"]
# pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='')
# Specific time is UTC
# pytrends.build_payload(kw_list, cat=0, timeframe='2017-02-06T10 2017-02-12T07', geo='', gprop='')
# date format YYYY-DD-MM
pytrends.build_payload(kw_list,
                       cat=0,
                       timeframe='2009-01-01 2017-12-31',
                       geo='',
                       gprop='')
dt_pd_google = pytrends.interest_over_time()

dt_pd_google.rename(columns={'Bitcoin': 'google_tr'}, inplace=True)
dt_pd_google['google_tr_fd'] = dt_pd_google['google_tr'].diff(periods=1)
dt_pd_google['google_tr_MAVG30'] = round(
    dt_pd_google['google_tr'].rolling(window=30).mean(), 0)

dt_pd_google.to_pickle('dt_pd_google_monthly.pickle')

print('Google Trend Download Done')

if __name__ == '__main__':
    main()
Example #29
0
print(dates)
keywords = [
    "zoom", "teams", "skype", "hangouts", "teletrabajo", "videollamada",
    "videoconferencia", "whatsapp", "telegram", "viber", "tiktok",
    "refugiados", "inmigracion", "nacionalismo", "corrupcion", "juicio",
    "guerra comercial", "coronavirus", "pandemia", "infeccion", "medico",
    "disney", "amazon", "netflix", "hbo", "rakuten", "steam", "cabify", "taxi",
    "glovo", "just eat", "deliveroo", "uber eats", "comida a domicilio",
    "hacer deporte", "yoga", "meditacion", "cursos online"
]

pytrends = TrendReq(hl='ES', tz=0)
future_dataframe = {}
c = 1
for k in keywords:

    try:
        print("Requesting ", [k])
        pytrends.build_payload([k], timeframe=dates, geo='ES', gprop='')
        future_dataframe[c] = pytrends.interest_over_time()
        future_dataframe[c].drop(['isPartial'], axis=1, inplace=True)
        c += 1
        result = pd.concat(future_dataframe, axis=1)
    except:
        print("***", "\n", "Error with ", k,
              "or not enough trending percentaje", "\n", "***")

result.columns = result.columns.droplevel(0)
df1 = result.unstack(level=-1)
df2 = pd.DataFrame(df1)
df2.to_csv(os.getenv("PROJECT_TMP"))
    12: '2014-10-25 2015-07-12',
    13: '2015-03-04 2015-11-19',
    14: '2015-07-12 2016-03-28',
    15: '2015-11-19 2016-08-05',
    16: '2016-03-28 2016-12-13',
    17: '2016-08-05 2017-04-22',
    18: '2016-12-13 2017-08-30',
    19: '2017-04-22 2018-01-07',
}

z = 1
dt_pd_google_segments = pd.DataFrame(columns=['Bitcoin', 'segment'])
for x in single_frames:
    pytrends.build_payload(kw_list,
                           cat=0,
                           timeframe=single_frames[x],
                           geo='',
                           gprop='')
    dt_pd_google_tmp = pytrends.interest_over_time()
    if x > 0:
        print('x>0')
        dt_pd_google_tmp['segment'] = x
        lda = dt_pd_google_tmp['Bitcoin'] - dt_pd_google_segments['Bitcoin']
        dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp['Bitcoin'] - lda.mean(
            skipna=True)
        dt_pd_google_tmp.subtract(lda.mean(skipna=True),
                                  fill_value=0)['Bitcoin']
        dt_pd_google_segments = dt_pd_google_segments.append(dt_pd_google_tmp)
    else:
        print('x = 0')
        dt_pd_google_tmp['segment'] = x
Example #31
0
def main():
    print("First Module's Name: {}".format(__name__))
    print('OS:', os.name)
    os.chdir('..')

    if os.name == 'posix':
        sl = '/'
    elif os.name == 'nt':
        sl = '\\'

    # timezone 360 = US CST
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = ["Bitcoin"]

    single_frames = {
        0: '2010-07-16 2011-04-03',
        1: '2010-11-25 2011-08-12',
        2: '2011-04-04 2011-12-20',
        3: '2011-08-12 2012-04-28',
        4: '2011-12-20 2012-09-05',
        5: '2012-04-28 2013-01-13',
        6: '2012-09-05 2013-05-23',
        7: '2013-01-13 2013-09-30',
        8: '2013-05-23 2014-02-07',
        9: '2013-09-30 2014-06-17',
        10: '2014-02-07 2014-10-25',
        11: '2014-06-17 2015-03-04',
        12: '2014-10-25 2015-07-12',
        13: '2015-03-04 2015-11-19',
        14: '2015-07-12 2016-03-28',
        15: '2015-11-19 2016-08-05',
        16: '2016-03-28 2016-12-13',
        17: '2016-08-05 2017-04-22',
        18: '2016-12-13 2017-08-30',
        19: '2017-04-22 2017-11-30',
    }

    z = 1
    # documentation:
    # dt_pd_google_segments: final pd containing trend data
    # dt_pd_google_tmp: temporary set contatining one single frame, that is appended to pd_google_segments
    # lda = mean absolute deviation between
    dt_pd_google_segments = pd.DataFrame(
        columns=['Bitcoin', 'segment', 'google_tr_rtn'])
    for x in single_frames:
        pytrends.build_payload(kw_list,
                               cat=0,
                               timeframe=single_frames[x],
                               geo='',
                               gprop='')
        #dt_pd_google_tmp.iloc[0:0]
        #dt_pd_google_tmp = pytrends.interest_over_time()
        dt_pd_google_tmp = pd.read_csv(
            os.path.abspath(os.curdir) + sl + "D_Data" + sl +
            "G_Google_Trends" + sl + single_frames[x] + ".csv")
        dt_pd_google_tmp.drop(dt_pd_google_tmp.index[[0]], inplace=True)
        dt_pd_google_tmp.columns = ['Bitcoin']
        dt_pd_google_tmp.set_index(
            pd.to_datetime(dt_pd_google_tmp.index,
                           errors='raise',
                           format='%Y-%m-%d',
                           exact='True'))

        #mask = dt_pd_google_tmp.Bitcoin == 0
        #column_name = 'Bitcoin'
        #dt_pd_google_tmp.loc[mask, column_name] = 1
        # dt_pd_google_tmp['google_tr_rtn'] = dt_pd_google_tmp['Bitcoin'] / dt_pd_google_tmp['Bitcoin'].shift(1) - 1
        if x > 0:
            print('x>0')
            dt_pd_google_tmp['segment'] = x
            # lda = dt_pd_google_tmp['google_tr_rtn'] - dt_pd_google_segments['google_tr_rtn']
            # dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp['Bitcoin'] - lda.mean(skipna=True)
            # dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp['Bitcoin'] - 10 * lda.mean(skipna=True)
            # dt_pd_google_tmp['Bitcoin'] = dt_pd_google_tmp.subtract(lda.mean(skipna=True), fill_value=0)['Bitcoin']
            dt_pd_google_segments = dt_pd_google_segments.append(
                dt_pd_google_tmp)
        else:
            print('x = 0')
            dt_pd_google_tmp['segment'] = x
            dt_pd_google_segments = dt_pd_google_segments.append(
                dt_pd_google_tmp)
        print('retrieve frame', x, 'done - ', z / len(single_frames) * 100,
              '%')
        z = z + 1

    # drop overlapping points
    # dt_pd_google_segments.sort_index(ascending=True, inplace=True)
    dt_pd_google_segments['dd'] = dt_pd_google_segments.index
    dt_pd_google_segments.drop_duplicates(subset='dd',
                                          keep='first',
                                          inplace=True)
    dt_pd_google_segments.drop(['dd'], axis=1, inplace=True)
    # dt_pd_google_segments = dt_pd_google_segments[dt_pd_google_segments.index.duplicated(keep='first')]
    # dt_pd_google_segments.index.drop_duplicates(keep='last')

    # rename column
    dt_pd_google_segments.rename(columns={'Bitcoin': 'google_tr'},
                                 inplace=True)

    # 'normalize' index to 100
    # dt_pd_google_segments['google_tr'] = dt_pd_google_segments / \
    #                                      dt_pd_google_segments.loc[dt_pd_google_segments['google_tr'].idxmax()][
    #                                          'google_tr'] * 100

    # store to pickle
    dt_pd_google_segments.to_pickle('dt_pd_google_segments_rconx.pickle')

    print(os.path.basename(__file__), 'executed')