Exemple #1
0
class Trends:
    def __init__(self, keywords):
        self.pytrends = TrendReq(hl='uk', tz=360)
        self.pytrends.build_payload(keywords)

    def interest_over_time(self):
        return self.pytrends.interest_over_time()

    def interest_by_region(self, resolution='COUNTRY', inc_low_vol=False):
        return self.pytrends.interest_by_region(resolution=resolution,
                                                inc_low_vol=inc_low_vol)

    def related_topics(self):
        return self.pytrends.related_topics()

    def related_querries(self):
        return self.pytrends.related_queries()

    def trending_searches(self, pn='ukraine'):
        return self.pytrends.trending_searches(pn=pn)

    def top_charts(self, date, hl='uk', tz=360, geo='GLOBAL'):
        return self.pytrends.top_charts(date, hl=hl, tz=tz, geo=geo)

    def suggestions(self, keyword):
        return self.pytrends.suggestions(keyword)
def showcompare(name,list):
    pytrends = TrendReq(hl='en-US')
    word = unicodedata.normalize('NFKD', next(iter(list))).encode('ascii','ignore')
    pytrends.build_payload(kw_list=[word], timeframe='today 1-m', geo='US', gprop='news')
    related_topics_df = pytrends.related_topics()[word]
    plot_wordcloud(name+"Google",related_topics_df['title'].tolist())
    plot_wordcloud(name,list)
Exemple #3
0
def get_data(file_content):
    global final_dict
    global related_topic_df

    pytrends = TrendReq(hl='india', tz=360, retries=5, backoff_factor=0.5)

    for search in file_content:
        try:
            pytrends.build_payload(kw_list=[search],
                                   timeframe=f"{start_date} {end_date}",
                                   geo='IN')
            df = pytrends.interest_over_time().reset_index()
            df.drop(labels=['isPartial'], axis=1, inplace=True)
            final_dict = pd.concat([final_dict, df], axis=1)
            related_topic = pytrends.related_topics(
            )[search]['rising'].reset_index()
            related_topic_df = pd.concat([related_topic, related_topic_df],
                                         ignore_index=True,
                                         axis=1)
        except Exception as error:
            print(error)

    if related_topic_df.empty: pass

    final_dict.to_csv(f'..//..//output/final_df_{today}.csv', index=False)
    related_topic_df.to_csv(f'..//..//output/related_topic_df_{today}.csv',
                            index=False)
Exemple #4
0
async def google_trends(request):
    if request.method == 'GET':
        topic = request.args.get('topic').split(',')
        lang = request.args.get('lang')
        region = request.args.get('reg')
    
    else:
        return json({'response': False, 'message': 'Wrong HTTP method', 'results': []})

    if topic is None or topic is '':
        return json({'response': False, 'message': 'Not enough arguments', 'results': []})

    from pytrends.request import TrendReq

    pytrends = TrendReq(hl='{}-{}'.format(lang, region), tz=360)
    pytrends.build_payload(topic)

    res = pytrends.interest_over_time()
    res.drop('isPartial', axis=1, inplace=True)

    rel_topics = pytrends.related_topics()[topic[0]]
    rel_topics.drop('mid', axis=1, inplace=True)

    rel_queries = pytrends.related_queries()[topic[0]]

    countries = pytrends.interest_by_region(resolution='COUNTRY')
    countries = countries[(countries.T != 0).any()]

    from json import loads

    countries = loads(countries.to_json())[topic[0]]

    return json({'message':'done', 'response': True, 'result': {'interest': res.to_json(), 'related_topics': rel_topics.to_json(), 
    'top_queries': rel_queries['top'].to_json(), 'rising_queries': rel_queries['rising'].to_json(), 'countries': countries}})
    def parse(self, response):
        try:
            pytrends = TrendReq(hl='en-US', tz=360)
            pytrends.build_payload(self.search_term.split('|||'),
                                   cat=self.cat,
                                   timeframe='{} {}'.format(
                                       self.start_date, self.end_date),
                                   geo=self.geo,
                                   gprop=self.gprop)
            df = pytrends.related_topics()

            keys = ['rising', 'top']
            for key in keys:
                json_data = json.loads(
                    df[self.search_term][key].to_json(orient="table"))

                for item in json_data['data']:
                    for search_term in self.search_term.split('|||'):
                        yield RelatedTopicsItem(
                            search_term=search_term,
                            search_phrase=item['topic_title'],
                            top=item['formattedValue'] if key == 'top' else '',
                            breakout=item['formattedValue']
                            if key == 'rising' else '')
        except Exception as e:
            print(e)
            pass
    def getTrends(self, q, genType):

        #build query
        pytrend = TrendReq()
        pytrend.build_payload(kw_list=[q])

        #get timeseries data
        df = pytrend.interest_over_time()

        #create filter for last 2 years
        y = datetime.today()
        z = y.replace(year=y.year - 2)
        after = z.strftime('%Y%m%d')

        #filter and plot data
        df = df.query('index > ' + (after))
        plot = df.plot()
        plt.title('Search Interest Over Time')
        plt.ylabel('Relative Interest')
        plt.xlabel('Date')
        filename = q.replace(' ', '_')
        if genType == 'query':
            plt.savefig('static/images/query/google.png', bbox_inches='tight')
        if genType == 'suggested':
            plt.savefig('static/images/suggested/google_' + str(filename) +
                        '.png',
                        bbox_inches='tight')

        #get related topics and graph
        related = pytrend.related_topics()
        keys = related.keys()
        for key in keys:
            toprelated = related[key]['top'].head(10)
            relatedlist = list(toprelated['topic_title'])
            trendlist = []
            for item in relatedlist:
                item.replace(' ', '%20')
                item = "https://trends.google.com/trends/explore?geo=US&q=" + str(
                    item)
                trendlist.append(item)

            plot = toprelated.plot(x='topic_title',
                                   y='value',
                                   kind='bar',
                                   legend=None)
            plt.ylabel('Relative Similarity')
            plt.xlabel('Related Topics')
            plt.title('People who searched for ' + str(q) +
                      ' also searched for:')
            if genType == 'query':
                plt.savefig('static/images/query/google1.png',
                            bbox_inches='tight')
            if genType == 'suggested':
                plt.savefig('static/images/suggested/google1_' +
                            str(filename) + '.png',
                            bbox_inches='tight')

            return (trendlist)
def get_suggestions_and_topics(participant_name, hl='en-US', geo="", gprop=""):
    pytrends = TrendReq(hl=hl, tz=360, geo=geo)
    pytrends.build_payload([participant_name],
                           cat=0,
                           timeframe='2017-06-01 2020-06-01',
                           geo=geo,
                           gprop=gprop)
    dict_related_topic = pytrends.related_topics()
    suggestions = pytrends.suggestions(participant_name)
    return dict_related_topic, suggestions
    def __get_top_searches(self, search_term: str):

        pytrends = TrendReq()
        kw_list = [search_term]
        try:
            pytrends.build_payload(kw_list)
            web = pytrends.related_topics()
            # web = pytrends.suggestions(kw_list)
        except Exception:
            web = False
        return web
class Trends:
    '''
    Wrapper class for pytrends library.
    '''

    def __init__(self, keywords):
        '''
        Initialize trends object.
        '''
        self.pytrends = TrendReq(hl='uk', tz=360)
        self.pytrends.build_payload(keywords)

    def interest_over_time(self):
        '''
        Get interest overt time.
        '''
        return self.pytrends.interest_over_time()

    def interest_by_region(self, resolution='COUNTRY', inc_low_vol=False):
        '''
        Get interest by region.
        '''
        return self.pytrends.interest_by_region(resolution=resolution, inc_low_vol=inc_low_vol)

    def related_topics(self):
        '''
        Get related topics.
        '''
        return self.pytrends.related_topics()

    def related_querries(self):
        '''
        Get related search querries.
        '''
        return self.pytrends.related_queries()

    def trending_searches(self, pn='ukraine'):
        '''
        Get trending searhes by country.
        '''
        return self.pytrends.trending_searches(pn=pn)

    def top_charts(self, date, hl='uk', tz=360, geo='GLOBAL'):
        '''
        Get top charts by date.
        '''
        return self.pytrends.top_charts(date, hl=hl, tz=tz, geo=geo)

    def suggestions(self, keyword):
        '''
        Get suggestions for keyword.
        '''
        return self.pytrends.suggestions(keyword)
def related_topics(keywords):
    """
    Usage: Return a dictionary of strings(related topics) for each keyword

    """
    related_topics_dict = dict()
    for keyword in keywords:
        kwlist = [keyword]

        pytrends = TrendReq(hl='en-US',
                            tz=360,
                            timeout=(10, 25),
                            retries=2,
                            backoff_factor=0.1)
        pytrends.build_payload(kwlist)

        #Prevent pytrend fail
        try:
            data = pytrends.related_topics()
        except:
            related_string = 'Related Topics: None'
            related_topics_dict[keyword] = related_string
            continue
        print(keyword)
        #print(data)

        for x in data:
            key = x
            break

        df = data[key]['top']

        #Error exception
        IsDataEmpty = df.empty
        if IsDataEmpty == True:
            related_string = 'Related Topics: None'
        else:
            related_topics_list = []
            related_topics_list.append('Related topics:')

            #top 4 related topics
            for i in range(1, 5):
                try:
                    item = str(i) + '.' + df.at[i, 'topic_title']
                    related_topics_list.append(item)
                except KeyError:
                    break

            related_string = ' '.join(related_topics_list)
        related_topics_dict[keyword] = related_string

    return related_topics_dict
Exemple #11
0
def gtrend_getvalue(kw_list,output_file,timeframe):
    """
    ライブラリを使用してGoogleTrendsからデータを取得する。
    #pytrends ref https://pypi.org/project/pytrends/#interest-by-region
    """
    try:
        sp = kw_list[0]
        pytrends = TrendReq(hl='ja-JP', tz=360)
        pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo='JP', gprop='')
        #関連キーワード
        trendsdata = pytrends.related_queries()
        o = output_file
        s = sp + 'query'
        exportdata(trendsdata,o,s,1)
        #関連トピック
        trendsdata = pytrends.related_topics()
        s = sp + 'topic'
        exportdata(trendsdata,o,s,1)
        #地域別の関心
        trendsdata = pytrends.interest_by_region(resolution='REGION', inc_low_vol=True, inc_geo_code=False)
        s = sp + 'region'
        exportdata(trendsdata,o,s,0)
        #時系列
        trendsdata = pytrends.interest_over_time()
        s = sp + 'overtime'
        exportdata(trendsdata,o,s,0)
        #サジェスト 
        trendsdata = pytrends.suggestions(sp)
        s = sp + 'suggestions'
        suggest_to_excel(trendsdata,o,s)

        #注目キーワード
        #trendsword = pytrends.trending_searches(pn='united_states') #アメリカ
        #trendsword = pytrends.trending_searches(pn='japan') #日本
        #s = "trendword"
        #f = exportdata(trendsword,o,s,0)

    except Exception as e:
        t, v, tb = sys.exc_info()
        print(traceback.format_exception(t,v,tb))
        print(traceback.format_tb(e.__traceback__))
Exemple #12
0
def get_related_topics(sessiontrend=None,
                       term='',
                       geo='GB',
                       timeframe='today 5-y'):
    kw_list = [term]

    if sessiontrend is None:
        pytrends = TrendReq(hl='en-US', tz=0)
        pytrends.build_payload(kw_list,
                               cat=0,
                               timeframe=timeframe,
                               geo=geo,
                               gprop='')
    else:
        pytrends = sessiontrend

    related_topics = pytrends.related_topics()
    related_topics = related_topics[term]['rising'].values.tolist()
    #print(len(related_topics), related_topics)
    # return related_topics
    return json.dumps(related_topics)
Exemple #13
0
class Trends:
    def __init__(self):
        self.pytrends = TrendReq()

    def get_results(self, keyword):
        trends = []
        try:
            suggs = self.pytrends.suggestions(keyword)
            kw_list = [suggs[0]['mid']]

            self.pytrends.build_payload(kw_list,
                                        cat=0,
                                        timeframe='today 5-y',
                                        geo='',
                                        gprop='')
            df = self.pytrends.related_topics()

            for i in df[suggs[0]['mid']]['top']['topic_title'][:3]:
                if i.lower() != keyword.lower():
                    trends.append(i.lower())
        except:
            pass
        return trends
Exemple #14
0
class Crawler:
    def __init__(self, url, data):
        self.url = url
        self.source_code = requests.get(self.url)
        self.plain_text = self.source_code.text
        self.soup = BeautifulSoup(self.plain_text, "html.parser")
        self.data = data
        self.modal_data = {}
        self.modal_images = {}
        self.modal_keywords = {}
        self.pytrends = TrendReq(hl='en-US')

    def crawl(self, maxPages):
        page = 1
        seo_result = {}
        seo_result_2 = {}
        while page <= maxPages:
            page += 1

            page_speed_url = "https://www.googleapis.com/pagespeedonline/v2" \
                             "/runPagespeed?url=" + self.url + \
                             "&filter_third_party_resources=true&locale=en_US" \
                             "&screenshot=true&strategy=mobile&key" \
                             "=AIzaSyALJmgrWTmS3hvq3lxWzWnvo9FimxY-VAY"

            mobile_ready_url = 'https://www.googleapis.com/pagespeedonline' \
                               '/v3beta1' \
                               '/mobileReady?url=' + self.url + '&key=AIzaSyALJmgrWTmS3hvq3lxWzWnvo9FimxY-VAY'

            page_speed = requests.get(page_speed_url).json()

            page_speed['screenshot']['data'] = page_speed['screenshot'][
                'data'].replace('_', '/')
            page_speed['screenshot']['data'] = page_speed['screenshot'][
                'data'].replace('-', '+')

            seo_result['page_speed'] = page_speed

            mobile_ready = requests.get(mobile_ready_url).json()

            mobile_ready['screenshot']['data'] = mobile_ready['screenshot'][
                'data'].replace('_', '/')
            mobile_ready['screenshot']['data'] = mobile_ready['screenshot'][
                'data'].replace('-', '+')

            seo_result['mobile_ready'] = mobile_ready

            seo_result_2['Nested Tables'] = self.check_nested_tables()

            seo_result_2['Title'] = self.check_title()
            seo_result_2['Description'] = self.check_description()

            seo_result_2['<h1> Headings'] = self.check_headings_one()
            seo_result_2['<h2> Headings'] = self.check_headings_two()

            seo_result_2['Image Alt Tag'] = self.check_images_alt()

            Tag.do_not_call_in_templates = True

            seo_result_2['Broken Links'] = self.check_broken_links()

            seo_result_2['Bad Urls'] = self.check_urls()
            seo_result_2['Inline CSS'] = self.check_styles()
            seo_result_2['Google Analytics'] = self.check_google_analytics()

            seo_result_2['Favicon'] = self.check_favicon()
            seo_result_2['HTTPS'] = self.check_https()

            seo_result_2['Frameset'] = self.check_frames()
            seo_result_2['Canonical Tag'] = self.check_canonical()
            seo_result_2['No Index'] = self.check_noindex()

            seo_result_2['Robots'] = self.check_robots_txt()
            seo_result_2['robots_url'] = self.get_robots()
            seo_result_2['Sitemaps'] = self.check_site_map()
            seo_result_2['Doctype'] = self.check_doctype()

            seo_result_2['No Follow'] = self.check_nofollow()
            seo_result_2['Flash'] = self.check_flash()

            seo_result_2['Keywords'] = self.keyword_results_data(
                self.keyword_results())
            seo_result_2['Mobile Ready'] = self.check_mobile_ready(
                mobile_ready)

            scores = {'passed': 0, 'failed': 0, 'warning': 0}
            result = {}

            for key, value in seo_result_2.items():
                try:
                    if value['passed'] is True:
                        scores['passed'] += 1
                        result[key] = {'passed': True}
                    elif value['passed'] is False:
                        scores['failed'] += 1
                        result[key] = {'passed': False}
                    elif value['passed'] is None:
                        scores['warning'] += 1
                        result[key] = {'passed': None}
                except:
                    pass

            api_list = {'Speed': {}, 'Usability': {}}

            for key, value in page_speed['formattedResults'][
                    'ruleResults'].items():
                if key == 'MainResourceServerResponseTime':
                    continue
                try:
                    if value['urlBlocks']:
                        scores['failed'] += 1
                        result[value['localizedRuleName']] = {'passed': False}
                    else:
                        scores['passed'] += 1
                        result[value['localizedRuleName']] = {'passed': True}
                except:
                    scores['passed'] += 1
                    result[value['localizedRuleName']] = {'passed': True}
                if 'SPEED' in value['groups']:
                    api_list['Speed'][key] = value
                elif 'USABILITY' in value['groups']:
                    api_list['Usability'][key] = value
                result[value['localizedRuleName']]['priority'] = 'medium'

            seo_result['list'] = seo_result_2

            #self.format_keyword_data(self.keyword_results())

            seo_result['messages'] = json.dumps(self.modal_data)
            seo_result['images'] = json.dumps(self.modal_images)

            #seo_result['keywords'] = json.dumps(self.modal_keywords)

            seo_result['url'] = self.url
            seo_result['scores'] = json.dumps(scores)
            seo_result['result_set'] = json.dumps(result)

            priority_list = {'high': [], 'medium': [], 'low': []}
            for key, value in self.data.items():
                result[key]['priority'] = value['priority']

            for key, value in result.items():
                if value['passed'] is False:
                    priority_list[result[key]['priority']].append(
                        '<a href="#" onclick="navigate(\'' + key +
                        '\', 100); return false;" class="orangelink">' +
                        html.escape(key) + '</a>')

            priority_list = {
                key: value if len(value) > 0 else
                ['All checks have been passed for this priority']
                for key, value in priority_list.items()
            }

            seo_result['result_set'] = json.dumps(result)
            seo_result['result_s'] = result
            priority_list = {
                'high': ', '.join(priority_list['high']),
                'medium': ', '.join(priority_list['medium']),
                'low': ', '.join(priority_list['low'])
            }
            seo_result['priority_list'] = priority_list

            omen = {'Common': {}, 'Advanced Seo': {}, 'Server': {}}
            for k, v in seo_result['list'].items():
                if type(v).__name__ == 'dict':
                    if v.get('category') == 'common':
                        omen['Common'][k] = v
                    elif v.get('category') == 'server':
                        omen['Server'][k] = v
                    elif v.get('category') == 'advanced_seo':
                        omen['Advanced Seo'][k] = v

            seo_result['omen'] = omen
            seo_result['api_list'] = api_list
            seo_result['nav_list'] = dict(omen, **api_list).keys()

            seo_result['total'] = json.dumps(self.calculate_result(result))

            return seo_result

    def calculate_result(self, data):
        scores = {'passed': 0, 'failed': 0}
        rates = {'high': 3, 'medium': 2, 'low': 1}
        for key, value in data.items():
            if type(value).__name__ == 'dict':
                if value.get('passed') is True:
                    scores['passed'] += rates[value['priority']]
                elif value.get('passed') is False:
                    scores['failed'] += rates[value['priority']]
        return scores

    def check_mobile_ready(self, mobile):
        return {
            'passed':
            mobile['ruleGroups']['USABILITY']['pass'],
            'content':
            mobile['ruleGroups']['USABILITY']['pass'],
            'length':
            0,
            'name':
            'Mobile Ready',
            'msg':
            self.format_message('Mobile Ready',
                                mobile['ruleGroups']['USABILITY']['pass'], []),
            'category':
            self.get_from_queryset('Mobile Ready', 'category'),
            'priority':
            self.get_from_queryset('Mobile Ready', 'priority')
        }

    def check_title(self):
        title = '' if self.soup.find('title') is None else self.soup.find(
            'title').string
        return {
            'passed':
            True if (len(title) <= 70 and len(title) > 0) else False,
            'content':
            title,
            'length':
            len(title),
            'name':
            'title',
            'msg':
            self.format_message(
                'Title', True if
                (len(title) <= 70 and len(title) > 0) else False,
                [len(title)]),
            'msg_data':
            '<p>' + title + '</p>' if len(title) > 0 else '',
            'category':
            self.get_from_queryset('Title', 'category'),
            'priority':
            self.get_from_queryset('Title', 'priority')
        }

    def check_description(self):
        description = '' if self.soup.find(
            'meta', {'name': "description"}) is None else self.soup.find(
                'meta', {
                    'name': "description"
                }).get('content')
        return {
            'passed':
            True if
            (len(description) <= 160 and len(description) > 0) else False,
            'content':
            description,
            'length':
            len(description),
            'name':
            'description',
            'msg':
            self.format_message(
                'Description', True if
                (len(description) <= 160 and len(description) > 0) else False,
                [len(description)]),
            'msg_data':
            '<p>' + description + '</p>' if len(description) > 0 else '',
            'category':
            self.get_from_queryset('Description', 'category'),
            'priority':
            self.get_from_queryset('Description', 'priority')
        }

    def check_headings_one(self):
        headings_one = [] if self.soup.findAll(
            'h1') is None else self.soup.findAll('h1')
        return {
            'passed':
            True if len(headings_one) == 1 else False,
            'content':
            headings_one,
            'length':
            len(headings_one),
            'name':
            'headings_one',
            'msg':
            self.format_message('<h1> Headings',
                                True if len(headings_one) == 1 else False,
                                [len(headings_one)]),
            'category':
            self.get_from_queryset('<h1> Headings', 'category'),
            'priority':
            self.get_from_queryset('<h1> Headings', 'priority')
        }

    def check_headings_two(self):
        headings_two = [] if self.soup.findAll(
            'h2') is None else self.soup.findAll('h2')
        return {
            'passed':
            True if len(headings_two) > 0 else False,
            'content':
            headings_two,
            'length':
            len(headings_two),
            'name':
            'headings_two',
            'msg':
            self.format_message('<h2> Headings',
                                True if len(headings_two) > 0 else False,
                                [len(headings_two)]),
            'category':
            self.get_from_queryset('<h2> Headings', 'category'),
            'priority':
            self.get_from_queryset('<h2> Headings', 'priority')
        }

    def check_images_alt(self):
        images = self.soup.findAll('img')
        no_alt = []
        if (images is None):
            return {
                'passed':
                True,
                'content':
                None,
                'length':
                0,
                'name':
                'no_alt',
                'msg':
                self.format_message('Image Alt Tag', True,
                                    [len(images), len(no_alt)]),
                'category':
                self.get_from_queryset('Image Alt Tag', 'category'),
                'priority':
                self.get_from_queryset('Image Alt Tag', 'priority')
            }
        else:
            for image in images:
                alternative = image.get('alt')
                #image['height'] = '50%'
                #image['width'] = '50%'
                if (alternative is None):
                    #no_alt.append(str(self.check_url(image)))
                    no_alt.append(str(image))
                    #no_alt.append(image)
                elif (alternative == ""):
                    #no_alt.append(str(self.check_url(image)))
                    no_alt.append(str(image))
                    #no_alt.append(image)
                else:
                    pass
            self.modal_data['Image Alt Tag'] = json.dumps(
                [html.escape(el) for el in no_alt])
            self.modal_images = no_alt
            return {
                'passed':
                False if len(no_alt) > 0 else True,
                'content':
                no_alt,
                'length':
                len(no_alt),
                'name':
                'no_alt',
                'msg':
                self.format_message('Image Alt Tag',
                                    False if len(no_alt) > 0 else True,
                                    [len(images), len(no_alt)]),
                'category':
                self.get_from_queryset('Image Alt Tag', 'category'),
                'priority':
                self.get_from_queryset('Image Alt Tag', 'priority'),
                'json':
                json.dumps(no_alt),
                'html':
                json.dumps([html.escape(el) for el in no_alt])
            }

    def check_url(self, tag):
        url = tag.get('src')
        if not url.startswith(self.url):
            tag['src'] = self.url + url if (
                url.startswith('/') and not self.url.endswith('/')) or (
                    not url.startswith('/')
                    and self.url.endswith('/')) else self.url + '/' + url
            tag['src'].replace('///', '/')
        return tag

    def check_links(self):
        links = self.soup.findAll('a', href=True)
        linkz = []
        for link in links:
            if link.get('href') != '' and link.get('href') != '#':
                linkz.append(str(link))
        return linkz

    def check_broken_links(self, update=False):
        links = self.soup.findAll('a')
        broken_links = []
        all_links = []
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
        }
        if (links is None):
            return {
                'passed':
                True,
                'content':
                None,
                'length':
                0,
                'name':
                'broken_links',
                'msg':
                self.format_message('Broken Links', True, [len(broken_links)]),
                'category':
                self.get_from_queryset('Broken Links', 'category'),
                'priority':
                self.get_from_queryset('Broken Links', 'priority')
            }
        else:
            for link in links:
                href = link.get('href')
                if (href is None or link.get('onclick') is not None):
                    continue
                elif ('#' == href or 'javascript' in href or '@' in href):
                    continue
                elif ('https://' not in href and 'http://' not in href):
                    href = self.url + href
                try:
                    all_links.append(href)
                    request = requests.head(href, headers=headers)
                    if request.status_code >= 400:
                        request = requests.get(href, headers=headers)
                except:
                    broken_links.append(str(link))
                if request.status_code < 400:
                    pass
                else:
                    broken_links.append(str(link))
            #if update:
            #self.modal_data['Broken Links'] = json.dumps([html.escape(el) for el in broken_links])
            self.modal_data['Broken Links'] = json.dumps(
                [html.escape(el) for el in broken_links])
            return {
                'passed':
                False if len(broken_links) > 0 else True,
                'content':
                broken_links,
                'length':
                len(broken_links),
                'name':
                'broken_links',
                'msg':
                self.format_message(
                    'Broken Links', False if len(broken_links) > 0 else True,
                    [len(all_links), len(broken_links)]),
                'html':
                json.dumps(broken_links),
                'category':
                self.get_from_queryset('Broken Links', 'category'),
                'priority':
                self.get_from_queryset('Broken Links', 'priority')
            }

    def url_exists(self, value):
        try:
            request = requests.head(value, allow_redirects=True)
        except ConnectionError:
            return False
        except MissingSchema:
            return False
        except:
            return False
        else:
            return True

    def check_script_async(self):
        scripts = self.soup.findAll('script')
        result = []
        if scripts is None:
            return {
                'passed': True,
                'content': None,
                'length': 0,
                'name': 'script_async',
                'msg': self.format_message('Script Async', True, [len(result)])
            }
        else:
            for script in scripts:
                if script.get('async') and script.get(
                        'src') is None or script.get('src') is '':
                    result.append(script)
            return {
                'passed':
                False if len(result) > 0 else True,
                'content':
                result,
                'length':
                len(result),
                'name':
                'script_async',
                'msg':
                self.format_message('Script Async',
                                    False if len(result) > 0 else True,
                                    [len(result)])
            }

    def check_section(self):
        result = []
        for counter in range(1, 7):
            headings = self.soup.findAll('h' + str(counter))
            if headings is not None:
                for heading in headings:
                    if heading.contents:
                        if '<section' in str(heading.contents):
                            result.append(heading)
        return result

    def check_section_headings(self):
        sections = self.soup.findAll('section')
        result = []
        if sections is None:
            return result
        else:
            for section in sections:
                if section.contents:
                    if ('<h2' not in str(section.contents)
                            and '<h3' not in str(section.contents)
                            and '<h3' not in str(section.contents)
                            and '<h4' not in str(section.contents)
                            and '<h5' not in str(section.contents)
                            and '<h6' not in str(section.contents)):
                        result.append(section)
        return result

    def check_urls(self):
        links = self.soup.findAll('a')
        bad_urls = []
        if (links is None):
            return {
                'passed': True,
                'content': bad_urls,
                'length': len(bad_urls),
                'name': 'bad_urls',
                'msg': self.format_message('Bad Urls', True, [len(bad_urls)]),
                'category': self.get_from_queryset('Bad Urls', 'category'),
                'priority': self.get_from_queryset('Bad Urls', 'priority')
            }
        else:
            for link in links:
                href = link.get('href')
                if (href is None or link.get('onclick') is not None):
                    continue
                elif ('#' == href or 'javascript' in href or '@' in href):
                    continue
                elif ('https://' not in href and 'http://' not in href):
                    href = self.url + href
                if ('?' in href):
                    if '_' in href.split('?')[0] or '=' in href.split('?')[0]:
                        bad_urls.append(str(link))
                else:
                    if '_' in href or '=' in href:
                        bad_urls.append(str(link))
            self.modal_data['Bad Urls'] = json.dumps(
                [html.escape(el) for el in bad_urls])
            return {
                'passed':
                False if len(bad_urls) > 0 else True,
                'content':
                bad_urls,
                'length':
                len(bad_urls),
                'name':
                'bad_urls',
                'msg':
                self.format_message('Bad Urls',
                                    False if len(bad_urls) > 0 else True,
                                    [len(bad_urls)]),
                'html':
                json.dumps(bad_urls),
                'category':
                self.get_from_queryset('Bad Urls', 'category'),
                'priority':
                self.get_from_queryset('Bad Urls', 'priority')
            }

    def check_styles(self):
        elements = [] if self.soup.findAll() is None else self.soup.findAll()
        result = []
        for element in elements:
            style = element.get('style')
            if (style is not None):
                result.append(str(element))
        self.modal_data['Inline CSS'] = json.dumps(
            [html.escape(el) for el in result])
        return {
            'passed':
            False if len(result) > 0 else True,
            'content':
            result,
            'length':
            len(result),
            'name':
            'inline_styles',
            'msg':
            self.format_message('Inline CSS',
                                False if len(result) > 0 else True,
                                [len(result)]),
            'html':
            json.dumps([html.escape(el) for el in result]),
            'json':
            json.dumps(result),
            'category':
            self.get_from_queryset('Inline CSS', 'category'),
            'priority':
            self.get_from_queryset('Inline CSS', 'priority')
        }

    def check_google_analytics(self):
        scripts = [] if self.soup.findAll(
            'script') is None else self.soup.findAll('script')
        result = False
        for script in scripts:
            src = script.get('src')
            if (src is not None):
                if ('//www.google-analytics.com/analytics.js' in src):
                    result = True
                    break
            elif ('google-analytics.com' in script.string):
                result = True
                break
        return {
            'passed': result,
            'content': None,
            'length': 0,
            'name': 'google_analytics',
            'msg': self.format_message('Google Analytics', result, [0]),
            'category': self.get_from_queryset('Google Analytics', 'category'),
            'priority': self.get_from_queryset('Google Analytics', 'priority')
        }

    def check_favicon(self):
        links = self.soup.findAll('link')
        result = False
        if (links is not None):
            for link in links:
                rel = link.get('rel')
                if (rel is not None):
                    if ('icon' in rel):
                        result = link
                        break
        return {
            'passed':
            True if result is not False else False,
            'content':
            result,
            'length':
            0,
            'name':
            'favicon',
            'msg':
            self.format_message('Favicon',
                                True if result is not False else False,
                                [result]),
            'category':
            self.get_from_queryset('Favicon', 'category'),
            'priority':
            self.get_from_queryset('Favicon', 'priority')
        }

    def check_robots(self, robots_url):
        try:
            from urllib.request import urlopen
            with urlopen(robots_url) as stream:
                result = stream.read().decode("utf-8")
            if (result):
                return result
            else:
                return False
        except:
            return False

    def check_robots_txt(self):
        robots_url = self.get_robots()
        if robots_url:
            try:
                from urllib.request import urlopen
                with urlopen(robots_url) as stream:
                    # print(stream.read().decode("utf-8"))
                    result = stream.read().decode("utf-8")
            except:
                result = False
        else:
            result = False
        return {
            'passed':
            True if result else False,
            'content':
            result,
            'length':
            1 if result else 0,
            'name':
            'robots',
            'msg':
            self.format_message('Robots', True if result else False, []),
            'msg_data':
            '<p><a href="' + robots_url + '">' + robots_url +
            '</a></p>' if result else '',
            'category':
            self.get_from_queryset('Robots', 'category'),
            'priority':
            self.get_from_queryset('Robots', 'priority')
        }

    def get_robots(self):
        return reppy.Robots.robots_url(self.url)

    def check_sitemap(self):
        temp = reppy.Robots.fetch(self.url)
        return list(temp.sitemaps)

    def check_site_map(self):
        sitemaps = []
        str = self.check_robots(self.get_robots())
        if str is not False:
            lines = str.split('\n')
            for line in lines:
                if 'Sitemap:' in line:
                    temp = line.split(':', 1)
                    if temp[1] not in sitemaps:
                        sitemaps.append(temp[1])
            result = '<p>'
            for sitemap in sitemaps:
                result += '<a href="' + sitemap.lower().strip(
                ) + '">' + sitemap.lower().strip() + '</a><br>'
            result += '</p>'
        else:
            result = ''
        return {
            'passed':
            True if len(sitemaps) > 0 else False,
            'content':
            sitemaps,
            'length':
            len(sitemaps),
            'name':
            'sitemaps',
            'msg':
            self.format_message('Sitemaps',
                                True if len(sitemaps) > 0 else False,
                                [len(sitemaps)]),
            'msg_data':
            result if len(sitemaps) > 0 else '',
            'category':
            self.get_from_queryset('Sitemaps', 'category'),
            'priority':
            self.get_from_queryset('Sitemaps', 'priority')
        }

    def check_https(self):
        try:
            headers = {
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
            }
            requests.get(self.url, headers=headers)
            result = True
        except requests.exceptions.SSLError:
            result = False
        except:
            result = False
        return {
            'passed': result,
            'content': None,
            'length': 0,
            'name': 'Https',
            'msg': self.format_message('HTTPS', result, [0]),
            'category': self.get_from_queryset('HTTPS', 'category'),
            'priority': self.get_from_queryset('HTTPS', 'priority')
        }

    def check_deprecated_elements(self, type):
        elements = self.soup.findAll(type)
        result = []
        if (elements is None):
            return False
        else:
            for element in elements:
                result.append(element)
            return result

    def get_most_common_keyword_count(self):
        '''
        r = requests.get(self.url)
        soup = BeautifulSoup(r.content)
        text = (''.join(s.findAll(text=True)) for s in soup)#.findAll(text=True))
        c = Counter((x.rstrip(punctuation).lower() for y in text for x in y.split()))

        print(c.most_common())
        print([x for x in c if c.get(x) > 5])
        '''
        html = urllib.request.urlopen(self.url).read()
        # print(self.text_from_html(html))

        liist = re.findall(r'\b\w+', self.text_from_html(html))
        lst = [x.lower() for x in liist]
        counter = Counter(lst)
        occs = [(word, count) for word, count in counter.items()
                if count >= 5 and self.is_valid_keyword(word)]
        occs.sort(key=lambda x: x[1])
        #print(occs)
        return occs

    def tag_visible(self, element):
        if element.parent.name in [
                'style', 'script', 'head', 'title', 'meta', '[document]'
        ]:
            return False
        if isinstance(element, Comment):
            return False
        return True

    def text_from_html(self, body):
        soup = BeautifulSoup(body, 'html.parser')
        texts = soup.findAll(text=True)
        visible_texts = filter(self.tag_visible, texts)
        return u" ".join(t.strip() for t in visible_texts)

    def check_flash(self):
        result = True
        if '/.swf/S' in self.source_code or 'flashplayer' in self.source_code or 'https://get.adobe.com/flashplayer/' in self.source_code:
            result = False
        return {
            'passed': result,
            'content': None,
            'length': 0,
            'name': 'flash',
            'msg': self.format_message('Flash', result, [0]),
            'category': self.get_from_queryset('Flash', 'category'),
            'priority': self.get_from_queryset('Flash', 'priority')
        }

    def check_nested_tables(self):
        tables = self.soup.findAll('table')
        nested_tables = []
        for table in tables:
            if '<table' in table.contents:
                nested_tables.append(table)
        return {
            'passed':
            False if len(nested_tables) > 0 else True,
            'content':
            nested_tables,
            'length':
            len(nested_tables),
            'name':
            'nested_tables',
            'msg':
            self.format_message('Nested Tables',
                                False if len(nested_tables) > 0 else True,
                                [len(nested_tables)]),
            'category':
            self.get_from_queryset('Nested Tables', 'category'),
            'priority':
            self.get_from_queryset('Nested Tables', 'priority')
        }

    def check_frames(self):
        framesets = self.soup.findAll('frameset')
        frames = self.soup.findAll('frame')
        iframes = self.soup.findAll('iframe')
        return {
            'passed':
            False if len(framesets) > 0 or len(frames) > 0 or len(iframes) > 0
            else True,
            'content': [framesets, frames],
            'length': [len(framesets),
                       len(frames), len(iframes)],
            'name':
            'Frameset',
            'msg':
            self.format_message(
                'Frameset', False if len(framesets) > 0 or len(frames) > 0
                or len(iframes) > 0 else True,
                [len(framesets), len(frames),
                 len(iframes)]),
            'category':
            self.get_from_queryset('Frameset', 'category'),
            'priority':
            self.get_from_queryset('Frameset', 'priority')
        }

    def check_canonical(self):
        canonical = self.soup.find('link', rel='canonical')
        return {
            'passed':
            True if canonical else False,
            'content':
            canonical,
            'length':
            1 if canonical else 0,
            'name':
            'canonical',
            'msg':
            self.format_message(
                'Canonical Tag', True if canonical else False,
                ['uses' if canonical else 'does not use', self.url]),
            'category':
            self.get_from_queryset('Canonical Tag', 'category'),
            'priority':
            self.get_from_queryset('Canonical Tag', 'priority')
        }

    def check_safe_browsing(self):
        '''
        sbl = SafeBrowsingList('AIzaSyALJmgrWTmS3hvq3lxWzWnvo9FimxY-VAY')
        threat_list = sbl.lookup_url('http://github.com/')
        if threat_list == None:
            print("no threat")
        else:
            print('threats: ' + str(threat_list))
            print("Type: ", type(threat_list))
        return threat_list
        '''
        safe_browsing_url = 'https://sb-ssl.google.com/safebrowsing/api/lookup?client=demo-app&key=AIzaSyALJmgrWTmS3hvq3lxWzWnvo9FimxY-VAY&appver=1.5.2&pver=3.1&url=http%3A%2F%2Fianfette.org%2F'
        safe_url = requests.get(safe_browsing_url)  # .json()
        return safe_url

    def check_noindex(self):
        meta_tags = self.soup.findAll('meta')
        noindex_tags = []
        if meta_tags and meta_tags is not None:
            for meta_tag in meta_tags:
                name = meta_tag.get('name')
                if name is not None:
                    if 'robots' in name or 'googlebot' in name:
                        if meta_tag.get('content') == 'noindex':
                            noindex_tags.append(meta_tag)
        html_list = []
        for link in noindex_tags:
            html_list.append(str(link))
        self.modal_data['No Index'] = json.dumps(
            [html.escape(el) for el in html_list])
        return {
            'passed':
            None,
            'content':
            noindex_tags,
            'length':
            len(noindex_tags),
            'name':
            'no_index',
            'msg':
            self.format_message('No Index', None, [
                'uses' if len(noindex_tags) > 0 else 'does not use',
                len(noindex_tags)
            ]),
            'html':
            json.dumps(noindex_tags),
            'category':
            self.get_from_queryset('No Index', 'category'),
            'priority':
            self.get_from_queryset('No Index', 'priority')
        }

    def check_nofollow(self):
        links = self.soup.findAll('a', rel="nofollow")
        html_list = []
        for link in links:
            html_list.append(str(link))
        self.modal_data['No Follow'] = json.dumps(
            [html.escape(el) for el in html_list])
        return {
            'passed':
            None,
            'content':
            links,
            'length':
            len(links),
            'name':
            'no_follow',
            'msg':
            self.format_message(
                'No Follow', None,
                ['uses' if len(links) > 0 else 'does not use',
                 len(links)]),
            'html':
            json.dumps(html_list),
            'category':
            self.get_from_queryset('No Follow', 'category'),
            'priority':
            self.get_from_queryset('No Follow', 'priority')
        }

    def check_doctype(self):
        items = [
            item for item in self.soup.contents
            if isinstance(item, bs4.Doctype)
        ]
        return {
            'passed': True if items else False,
            'content': str(items[0]) if items else None,
            'length': 1 if items else 0,
            'name': 'doctype',
            'msg': self.format_message('Doctype', True if items else False,
                                       []),
            'category': self.get_from_queryset('Doctype', 'category'),
            'priority': self.get_from_queryset('Doctype', 'priority')
        }

    def check_media_queries(self):
        try:
            response = requests.get(self.url)
            soup = BeautifulSoup(response.content, 'lxml')
            css_links = [
                link["href"] for link in soup.findAll("link")
                if "stylesheet" in link.get("rel", [])
            ]
        except Exception as e:
            pass

        pattern = re.compile(r'@media.+?\}')
        css_links = []
        media_only = []
        for url in css_links:
            try:
                response = requests.get(url).text
                media_only = pattern.findall(response)
            except Exception as e:
                media_only = []
            except:
                media_only = []
        return [css_links, media_only]

    def check_media(self):
        links = self.soup.findAll('link', rel="stylesheet")
        media_queries = []
        for link in links:
            try:
                css = requests.get(link.get('href')).text
            except:
                continue
            if '@media ' in css or link.get('media') is not None:
                media_queries.append(link)
        return media_queries

    def safe_browsing(self):
        params = urllib.parse.urlencode({
            'client': 'api',
            'apikey': 'AIzaSyALJmgrWTmS3hvq3lxWzWnvo9FimxY-VAY',
            'appver': '1.5.2',
            'pver': '3.1',
            'url': self.url
        })
        url = "https://sb-ssl.google.com/safebrowsing/api/lookup?%s" % params
        res = urlfetch.fetch(url, method=1)
        if res.status_code >= 400:
            raise Exception("Status: %s" % res.status_code)
        return res.status_code == 204

    def get_category(self, name):
        try:
            cat = self.data[name]['category']
        except:
            cat = None
        return cat

    def get_from_queryset(self, rule, name):
        try:
            data = self.data[rule][name]
        except:
            data = None
        return data

    def get_priority(self, name):
        try:
            priority = self.data[name]['priority']
        except:
            priority = None
        return priority

    def get_message(self, name, status):
        try:
            msg = self.data[name][status]['message']
        except:
            try:
                msg = self.data[name][None]['message']
            except:
                msg = ''
        return msg

    def format_message(self, name, status, args):
        message = self.get_message(name, status)
        if len(message) > 0 and len(args) > 0:
            for arg in args:
                message = message.replace(
                    '{{arg}}', arg if type(arg) is str else str(arg), 1)
        return message

    def keyword_results(self):
        results = self.get_most_common_keyword_count()
        keywords = {}
        for result in results:
            keywords[result[0]] = result[1]
        return keywords

    def keyword_results_data(self, keywords):
        if len(keywords) > 0:
            msg = '<ul class="list-group list-group-flush">'
            for key, value in keywords.items():
                msg += '<li onclick="prepare_modal(\'Keywords\', \'' + key + '\')" class="list-group-item list-group-item-light" name="hover-item"><small>' + key + ' - ' + str(
                    value) + ' times </small></li>'
            msg += '</ul>'
        else:
            msg = ''
        #self.modal_data['Keywords'] = json.dumps(self.format_keyword_data(keywords))
        #self.modal_data['Keywords'] = json.dumps([html.escape(el) for el in no_alt])
        return {
            'passed': None,
            'content': None,
            'length': 0,
            'name': 'keywords',
            'msg': self.format_message('Keywords', None, []),
            #'html': json.dumps(self.format_keyword_data(keywords)),
            'msg_data': msg,
            'category': self.get_from_queryset('Keywords', 'category'),
            'priority': self.get_from_queryset('Keywords', 'priority')
        }

    def is_valid_keyword(self, text):
        try:
            float(text)
            return False
        except:
            return True if len(text) > 1 else False

    def format_keyword_data(self, keywords):
        keyword_data = {}
        keywords_data = {}
        for key, value in keywords.items():
            keyword_data[key] = {'data': self.keyword_data(key), 'freq': value}
            keywords_data[key] = self.keyword_data(key)
        self.modal_keywords = keywords_data
        return keyword_data

    def keyword_data(self, keyword, res='COUNTRY'):
        #pytrends = TrendReq(hl='en-US')
        self.pytrends.build_payload([keyword])
        data = {}
        dat = {}

        data['interest_over_time'] = self.pytrends.interest_over_time(
        ).to_dict()
        for key, value in data['interest_over_time'].items():
            dat[key] = {}

            for k, v in value.items():
                dat[key][str(k.to_pydatetime())] = v

        data['interest_over_time'] = dat
        data['interest_by_region'] = self.pytrends.interest_by_region(
            resolution=res).to_dict()
        data['related_topics'] = self.pytrends.related_topics()
        data['related_queries'] = self.pytrends.related_queries()
        data['suggestions'] = self.pytrends.suggestions(keyword)

        data['related_topics'][keyword] = data['related_topics'][
            keyword] if data['related_topics'][keyword] is None else data[
                'related_topics'][keyword].to_dict()
        data['related_queries'][keyword] = data['related_queries'][keyword]
        data['related_queries'][
            keyword]['top'] = data['related_queries'][keyword]['top'].to_dict(
            ) if data['related_queries'][keyword]['top'] is not None else None
        data['related_queries'][keyword]['rising'] = data['related_queries'][
            keyword]['rising'].to_dict() if data['related_queries'][keyword][
                'rising'] is not None else None

        return data
Exemple #15
0
df.reset_index().plot(x='geoName',
                      y='Chinese virus',
                      figsize=(120, 10),
                      kind='bar')
# The table is very diaspora, we want to filter the data to contain only numbers above a treshold
df = df[(df['Chinese virus'] >= 20)]
df = df.sort_values(by=['Chinese virus'], ascending=False)  # sort the data
df.reset_index().plot(x='geoName',
                      y='Chinese virus',
                      figsize=(30, 10),
                      kind='bar')

#========================Daily Search Trends========================
# Get Google Hot Trends data
df = pytrend.trending_searches(pn='united_states')
df.head()
df = pytrend.today_searches(pn='US')  # For today's searches

#===========================Related Queries==========================
# Let us see what are the related queries for the topic "Coronavirus"
# When you want to change the topic name just run the following code again with the new name as the parameter
pytrend.build_payload(kw_list=['Chinese virus'])

# Related Queries, returns a dictionary of dataframes
related_queries = pytrend.related_queries()
related_queries.values()

# Related Topics, returns a dictionary of dataframes
related_topic = pytrend.related_topics()
related_topic.values()
Exemple #16
0
for contory in contories:
    # Google Trends に 取得したいデータや期間などの情報を渡す
    pytrends.build_payload([contory],
                           cat=0,
                           timeframe='today 5-y',
                           geo='JP',
                           gprop='')

    # data1 = pytrends.related_queries()
    # data1[contory]['top'].to_csv('data/related_queries/top/' + contory + '.csv', encoding='utf_8_sig')
    # data1[contory]['rising'].to_csv('data/related_queries/raising/' + contory + '.csv', encoding='utf_8_sig')

    # data2 = pytrends.interest_over_time()
    # data2.to_csv('data/interest_over_time/' + contory + '.csv', encoding='utf_8_sig')

    data3 = pytrends.related_topics()
    data3[contory]['rising'].to_csv('data/related_topics/' + contory + '.csv',
                                    encoding='utf_8_sig')

    # data2 = pytrends.related_queries()
    # print(data2)

    # 取り出したいデータを指定して、実際に取り出す処理
    # data = pytrends.related_queries()
    # data[contory]['top'].to_csv('data/related/Py_VS_R' + contory + '.csv', encoding='utf_8_sig')
    # 結果をコンソールに出力する
    # print(result1[contory]['top'].to_json)
    # for label, content in result1[contory]['top'].items():
    #     print('label:', label)
    #     print('content:', content.to_json, sep='\n')
Exemple #17
0
from pytrends.request import TrendReq
pytrends = TrendReq(hl='ja-JP', tz=360)

# Python3 code to iterate over a list
kw_list = ["アメリカ"]

# Using for loop
# for keyword in kw_list:
# print(keyword)

pytrends.build_payload(kw_list,
                       cat=0,
                       timeframe='today 3-m',
                       geo='JP',
                       gprop='')
# result = pytrends.interest_by_region(resolution='DMA', inc_low_vol=True, inc_geo_code=False)
# result = pytrends.related_topics()
date = '2020-04'
df = pytrends.related_topics()
print(df)
# df.to_csv('data/' + 'keyword' + '.csv', encoding='utf_8')
compression_opts = dict(method='zip', archive_name='out.csv')
df.to_csv('out.zip', index=False, compression=compression_opts)
def main():
    # Set up api wrapper
    pytrends = TrendReq(hl='en-US', tz=360)

    # Limit of 5 keywords
    kw_list = ["Steelcase"]

    # Build pipeline
    pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='', gprop='')

    # Get overall interest over the entire timeline
    interestDF = pytrends.interest_over_time()
    interestDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrends5YearInterest_test.csv",
        index=True)
    print(interestDF.head())
    print()

    # Sleep 60 prevents you from being rate limited
    # Get hourly interest over the time set
    hourlyDF = pytrends.get_historical_interest(kw_list,
                                                year_start=2019,
                                                month_start=7,
                                                day_start=1,
                                                hour_start=0,
                                                year_end=2019,
                                                month_end=7,
                                                day_end=1,
                                                hour_end=1,
                                                cat=0,
                                                geo='',
                                                gprop='',
                                                sleep=60)
    hourlyDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrends5YearHourlyInterest_test.csv",
        index=True)
    print(hourlyDF.head())
    print()

    # Get regional interest across the world
    # Can switch to state or city specific
    regionDF = pytrends.interest_by_region(resolution='COUNTRY',
                                           inc_low_vol=True,
                                           inc_geo_code=False)
    regionDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsRegionInterest_test.csv",
        index=True)
    print(regionDF.head())
    print()

    # Get rising related topics
    risingDF = pytrends.related_topics().get('Steelcase').get('rising')
    risingDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsRisingRelated_test.csv",
        index=True)
    # Get top related topics
    topDF = pytrends.related_topics().get('Steelcase').get('top')
    topDF.to_csv(
        "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsTopRelated_test.csv",
        index=True)
    print(risingDF.head())
    print()
    print(topDF.head())
class Trendsetter():
    def __init__(self, timezone=1, language='en-US'):
        """
        Args:
            timezone: timezone in hours
            language: language of interface, not important
        """
        self.tz = -60 * timezone
        self.countries = {
            'united_states': ['US', 'en'],
            'united_kingdom': ['GB', 'en'],
            'australia': ['AU', 'en'],
            'germany': ['DE', 'de'],
            'france': ['FR', 'fr'],
            'italy': ['IT', 'it'],
            'japan': ['JP', 'ja'],
            'saudi_arabia': ['SA', 'ar'],
            'egypt': ['EG', 'ar'],
            # 'china': ['CN', 'zh-cn'],
            # 'iran': ['IR', 'ar'],
            'brazil': ['BR', 'pt'],
            'india': ['IN', 'hi'],
            'israel': ['IL', 'iw'],
            # 'spain': ['ES', 'es'],
            'mexico': ['MX', 'es'],
            'russia': ['RU', 'ru'],
            'south_korea': ['KR', 'ko'],
            'taiwan': ['TW', 'zh-tw'],
            'hong_kong': ['HK', 'zh-tw'],
            'thailand': ['TH', 'th'],
            'turkey': ['TR', 'tr'],
            'vietnam': ['VN', 'vi'],
        }
        self.countrycodes = {v[0]: k for k, v in self.countries.items()}
        self.trends = TrendReq(hl=language, tz=self.tz)
        self.translator = gt.Translator(service_urls=[
            "translate.google.com", "translate.google.co.kr",
            "translate.google.at", "translate.google.de",
            "translate.google.ru", "translate.google.ch",
            "translate.google.fr", "translate.google.es"
        ])

    def browse_categories(self, levels=list()):
        """browse categories by list of index
        Args:
            levels: list, eg. [4,2]

        Returns:
            dataframe with child categories
        """
        cat = self.trends.categories()
        for i in levels:
            cat = cat['children'][i]

        print(cat['name'], ", id =", cat['id'])
        if 'children' in cat.keys():
            children = pd.DataFrame.from_dict(cat['children'])
            # children.index = children['id']
            return children

    def get_trending(self, country='united_states'):
        """
        get currently and daily trends for implemented countries

        Args:
            country: country name or country code

        Returns:
            {'trending': list, 'today': list}

        Raises:
            ValueError if country not supported
        """

        if country not in self.countries:
            if country in self.countrycodes:
                country = self.countrycodes[country]
            else:
                raise ValueError("Country not supported.")

        self.trending = {
            'trending':
            list(self.trends.trending_searches(pn=country)[0]),
            'today':
            list(self.trends.today_searches(pn=self.countries[country][0]))
        }

        if self.countries[country][1] != 'en':
            try:
                self.trending_en = {
                    k + '_en': list(
                        map(
                            lambda t: t.text,
                            self.translator.translate(
                                v, dest='en', src=self.countries[country][1])))
                    for k, v in self.trending.items()
                }
                self.trending.update(self.trending_en)
            except JSONDecodeError:
                warnings.warn("google translate API limit reached")
            except:
                warnings.warn("google translate API not working")

        return self.trending

    def get_related(self,
                    kw,
                    timeframe='now 7-d',
                    category=0,
                    location='',
                    gtype=''):

        if isinstance(timeframe, list):
            tf_str = ' '.join(timeframe)
        else:
            tf_str = timeframe

        self.trends.build_payload([kw],
                                  cat=category,
                                  timeframe=tf_str,
                                  geo=location,
                                  gprop=gtype)
        related_topics = self.trends.related_topics()[kw]
        related_topics = related_topics['top'].append(related_topics['rising'],
                                                      ignore_index=True,
                                                      sort=False)

        return related_topics

    def get_interest(self,
                     kwds,
                     timeframe='now 7-d',
                     category=0,
                     location='',
                     gtype=''):
        """

        Args:
            kwds: list of up to 5 keywords
            timeframe: supported google format. or [t_start, t_end]; for daily output: 'YYYY-mm-dd',
                       for hourly output: 'YYYY-mm-ddThh'
            category:
            location: supported google location or country code
            google_product:

        Returns:
            DataFrame
        """
        if isinstance(kwds, str):
            kwds = [kwds]

        if isinstance(timeframe, list):
            tf_str = ' '.join(timeframe)
        else:
            tf_str = timeframe
            timeframe = timeframe.split(' ')

        if 'T' in tf_str:  # hourly data
            format_str = '%Y-%m-%dT%H'
        else:  # daily data
            format_str = '%Y-%m-%d'

        # needs improvement:
        if any(s in tf_str for s in ['now', 'today', 'all']):
            self.trends.build_payload(kwds,
                                      cat=category,
                                      timeframe=tf_str,
                                      geo=location,
                                      gprop=gtype)
            self.interest = self.trends.interest_over_time()
        else:
            t_start = datetime.datetime.strptime(timeframe[0], format_str)
            t_end = datetime.datetime.strptime(timeframe[1], format_str)
            if 'T' in tf_str and t_end - t_start >= datetime.timedelta(days=8):
                self.interest = self.trends.get_historical_interest(
                    kwds,
                    year_start=t_start.year,
                    year_end=t_end.year,
                    month_start=t_start.month,
                    month_end=t_end.month,
                    day_start=t_start.day,
                    day_end=t_end.day,
                    hour_start=t_start.hour,
                    hour_end=t_end.hour,
                    cat=category,
                    geo=location,
                    gprop=gtype,
                    sleep=60)
            else:
                self.trends.build_payload(kwds,
                                          cat=category,
                                          timeframe=tf_str,
                                          geo=location,
                                          gprop=gtype)
                self.interest = self.trends.interest_over_time()
        return self.interest
Exemple #20
0
def index():
    if request.method == 'POST':
        keyword = request.form['name']
        # 今日
        today = date.today()
        plt.clf()

        # 30日前
        day = today - timedelta(30)

        #print(day)

        dt_now = datetime.now()

        dt_now_s = str(dt_now.microsecond)
        pytrends = TrendReq(hl='ja-JP', tz=360)
        #keyword=''
        kw_list = [keyword]
        pytrends.build_payload(kw_list,
                               cat=0,
                               timeframe=str(day) + ' ' + str(today),
                               geo='JP',
                               gprop='')
        df = pytrends.interest_over_time()  #時系列データを取り出す
        df.to_csv(dt_now_s + ".csv", encoding='cp932')
        #関連トピック
        df = pytrends.related_topics()
        #トップ
        try:
            text_ = df[keyword]['top'].loc[:, ['topic_title']].head(10)
            text__ = text_['topic_title']
            _text = '\n・'.join(text__)
            text = _text.replace('Name: topic_title, dtype: object', '')
        except:
            text = 'なし'
        #上昇
        try:
            text2_ = df[keyword]['rising'].loc[:, ['topic_title']].head(10)
            text2__ = text2_['topic_title']
            _text2 = '\n・'.join(text2__)
            text2 = _text2.replace('Name: topic_title, dtype: object', '')
        except:
            text2 = 'なし'

        #関連キーワード
        df = pytrends.related_queries()
        #トップ
        try:
            text3_ = df[keyword]['top'].head(10)
            text3__ = text3_['query']
            _text3 = '\n・'.join(text3__)
            text3 = _text3.replace('Name: query, dtype: object', '')
        except:
            text3 = 'なし'
        #上昇
        try:
            text4_ = df[keyword]['rising'].head(10)
            text4__ = text4_['query']
            _text4 = '\n・'.join(text4__)
            text4 = _text4.replace('Name: query, dtype: object', '')
        except:
            text4 = 'なし'

        #print(keyword+'.csv')

        df = pd.read_csv(dt_now_s + '.csv', encoding='cp932')
        '''
        print(df)
        print(df.columns)
        print(df['date'])
        print(df[keyword])
        '''
        img = io.BytesIO()
        #グラフの作成
        fig = plt.figure()
        plt.figure(1)
        plt.plot(df['date'], df[keyword], marker="o")
        #グラフの軸
        plt.xlabel(df['date'].name)
        plt.ylabel(keyword)

        plt.savefig(img, format='png')
        img.seek(0)

        plot_url = base64.b64encode(img.getvalue()).decode()

        #グラフ表示
        #plt.show()
        return render_template('choice.html',
                               text=text,
                               text2=text2,
                               text3=text3,
                               text4=text4,
                               img="data:image/png;base64,{}".format(plot_url))
pytrend.build_payload(kw_list=['photography'])

# # Interest Over Time
# interest_over_time_df = pytrend.interest_over_time()
# print(interest_over_time_df.head())

# # Interest by Region
# interest_by_region_df = pytrend.interest_by_region()
# print(interest_by_region_df.head())

# # Related Queries, returns a dictionary of dataframes
# related_queries_dict = pytrend.related_queries()
# print(related_queries_dict)

# Related Topics, returns a dictionary of dataframes
related_topic_dict = pytrend.related_topics()
print(related_topic_dict)

# # Get Google Hot Trends data
# trending_searches_df = pytrend.trending_searches()
# print(trending_searches_df.head())

# # Get Google Hot Trends data
# today_searches_df = pytrend.today_searches()
# print(today_searches_df.head())

# # Get Google Top Charts
# top_charts_df = pytrend.top_charts(2018, hl='en-US', tz=300, geo='GLOBAL')
# print(top_charts_df.head())

# # Get Google Keyword Suggestions
Exemple #22
0
class GoogleTrend(DataCollector):  # 구글 트렌드를 통해 정보를 가져오는 클래스
    def __init__(self,
                 keyword=['youtube'],
                 hl='ko',
                 tz='82',
                 timeframe='today 5-y',
                 cat=0,
                 geo='KR',
                 gprop=''):  # 생성자 기본 설정 값
        self.hl = hl
        self.tz = tz
        self.keyword = keyword
        self.timeframe = timeframe
        self.cat = cat
        self.geo = geo
        self.gprop = gprop
        self.update_pytrend()
        self.update_payload()

    # Login to Google. Only need to run this once, the rest of requests will use the same session.
    def update_pytrend(self):
        self.pytrend = TrendReq(hl=self.hl, tz=self.tz)

    # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
    def update_payload(self):
        self.pytrend.build_payload(kw_list=self.keyword,
                                   cat=self.cat,
                                   timeframe=self.timeframe,
                                   geo=self.geo,
                                   gprop=self.gprop)

    def set_pytrend(self,
                    hl='None',
                    tz='None'):  # hl는 host language, tz는 time zone
        if hl != 'None':  # ex) 'ko', 'en_US'
            self.hl = hl
        if tz != 'None':  # ex) 82:한국, 360:미국
            self.tz = tz
        self.update_pytrend()
        self.update_payload()

    def set_payload(self,
                    keyword=None,
                    timeframe='None',
                    cat=-1,
                    geo='None',
                    gprop='None'):  # 키워드리스트, 타임프레임, 카테고리, 지역, 구글 프로퍼티
        if keyword != None:
            self.keyword = keyword
        if timeframe != 'None':  # ex) 'all', 'today 5-y', 'today 1,2,3-m', 'now 1,7-d', 'now 1,4-H', '2018-05-20 2019-01-20'
            self.timeframe = timeframe
        if cat != -1:
            self.cat = cat
        if geo != 'None':  # ex) 'KR', 'US', ''
            self.geo = geo
        if gprop != 'None':  # ex) 'images', 'news', 'youtube', 'froogle'
            self.gprop = gprop
        self.update_payload()

    def load_data(self, keyword=None):
        if keyword == 'region':
            self.interest_by_region()
            return self.interest_by_region_df_to_list()
        elif keyword == 'gender':
            return self.search_rate_by_gender()

    # Interest Over Time
    def interest_over_time(self):
        self.interest_over_time_df = self.pytrend.interest_over_time(
        )  # Returns pandas.Dataframe
        self.interest_over_time_df = self.interest_over_time_df.iloc[:, :self.
                                                                     keyword.
                                                                     __len__(
                                                                     )]  # 안쓰는 데이터 isPartial 제거
        self.interest_over_time_list = self.interest_over_time_df_to_list()
        return self.interest_over_time_list

    # Interest Over Time hourly
    def historical_hourly_interest(self):
        self.historical_hourly_interest_df = self.pytrend.get_historical_interest(
            keywords=self.keyword,
            year_start=2019,
            month_start=4,
            day_start=1,
            hour_start=0,
            year_end=2019,
            month_end=5,
            day_end=1,
            hour_end=0,
            cat=0,
            geo='KR',
            gprop='',
            sleep=0)  # Returns pandas.Dataframe
        self.historical_hourly_interest_df = self.historical_hourly_interest_df.iloc[:, :
                                                                                     self
                                                                                     .
                                                                                     keyword
                                                                                     .
                                                                                     __len__(
                                                                                     )]  # 안쓰는 데이터 isPartial 제거
        self.historical_hourly_interest_list = self.historical_hourly_interest_df_to_list(
        )
        return self.historical_hourly_interest_list

    # Interest by Region
    def interest_by_region(self):  # 지역별로 검색 비율을 알려준다
        self.interest_by_region_df = self.pytrend.interest_by_region()
        self.interest_by_region_list = self.interest_by_region_df_to_list()
        return self.interest_by_region_list

    # Related Topics, Returns dictionary of pandas.DataFrames
    def related_topics(self):  # 키워드 관련 토픽을 순위별로 알려준다
        self.related_topics_dict = self.pytrend.related_topics()
        return self.related_topics_dict

    # Related Queries, returns a dictionary of dataframes
    def related_queries(self):  # 키워드 관련 검색어를 순위별로 알려준다
        self.related_queries_dict = self.pytrend.related_queries()
        return self.related_queries_dict

    # trending searches in real time
    def trending_searches(self):  # 현재 시간대 인기검색어 순위 20까지 보여준다
        self.trending_searches_df = self.pytrend.trending_searches(
            pn='south_korea')
        return self.trending_searches_df

    #
    def today_searches(self):  #
        self.today_searches_df = self.pytrend.today_searches()
        return self.today_searches_df

    # Get Google Top Charts
    def top_charts(self):  # 년 단위로 상위 핫 키워드 가져오기
        self.top_charts_df = self.pytrend.top_charts(
            date=2015, hl='ko', tz='82', geo='KR'
        )  # date = YYYY integer, tz='82', geo='KR', geo='GLOBAL', geo='US'
        return self.top_charts_df

    # Get Google Category
    def categories(self):  # 구글 카테고리 종류와 id를 보여준다
        self.categories_df = self.pytrend.categories()
        return self.categories_df

    def show_interest_over_time(self):  # 시간에 따른 검색 비율을 그래프로 보여준다
        num = 0.0
        plt.figure(figsize=(14, 4))
        plt.style.use('ggplot')  # 더 이쁘게 그려준다
        for key in self.keyword:
            num += 0.1
            plt.plot(self.interest_over_time_df[key],
                     c=plt.cm.rainbow(num),
                     label=key)
        plt.legend(bbox_to_anchor=(1, 1), loc=2)  # 라벨의 위치를 정해준다
        plt.show()

    def interest_over_time_df_to_list(
            self):  # interest_over_time_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        date = self.interest_over_time_df.index.tolist()
        for i in range(len(date)):
            date[i] = date[i].date().strftime("%Y-%m-%d")
        date.insert(0, 'x')
        data = []
        data.append(date)
        for key in self.keyword:
            y = self.interest_over_time_df[key].tolist()
            y.insert(0, key)
            data.append(y)
        return data

    def historical_hourly_interest_df_to_list(
            self
    ):  # historical_hourly_interest_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        date = self.historical_hourly_interest_df.index.tolist()
        for i in range(len(date)):
            date[i] = date[i].date().strftime("%Y-%m-%d")
        date.insert(0, 'x')
        data = []
        data.append(date)
        for key in self.keyword:
            y = self.historical_hourly_interest_df[key].tolist()
            y.insert(0, key)
            data.append(y)
        return data

    def interest_by_region_df_to_list(
            self):  # interest_by_region_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        region = self.interest_by_region_df.index.tolist()
        data = []
        for key in self.keyword:
            y = self.interest_by_region_df[key].tolist()
        ratio = 0
        for i in [0, 1, 2, 3, 8, 11, 12, 13, 14, 15]:
            ratio += y[i]
        ratio /= 100
        tmp_val = 0
        reg_name = ''
        if ratio > 0:
            for i in range(len(region)):
                if i in [1, 2, 14, 11, 0, 13]:
                    if i == 0:
                        tmp_val = round(y[i] / ratio)
                        reg_name = '강원도'
                    elif i == 1:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '서울/경기'
                    elif i == 2:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '경상도'
                    elif i == 11:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '전라도'
                    elif i == 13:
                        tmp_val = round(y[i] / ratio)
                        reg_name = '제주도'
                    elif i == 14:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '충청도'
                    data.append([reg_name, tmp_val])
        return data

    def search_rate_by_gender(self):
        gender_data = []
        gender_data.append(['male', random.randint(50, 100)])
        gender_data.append(['female', random.randint(50, 100)])
        return gender_data
class DesignerTrendsCollector(BuilderTrendsCollector):
    """DesignerTrendsCollector contains the specific implementation of
    `BuilderTrendsCollector`.

    `DesignerTrendsCollector` contains the specific implementation of
    `BuilderTrendsCollector` based on the external library `pytrends`.

    Args:
        BuilderTrendsCollector (class): Abstract class that provides the implementations of the properties and methods.
    """
    def __init__(
        self,
        keyword_list: list,
        timeframe: str = "today 5-y",
        language: str = "en-US",
        category: int = 0,
        timezone: int = 360,
        country: str = "",
        property_filter="",
        **kwargs,
    ) -> None:
        """Initialization of DesignerTrendsCollector

        Args:
            keyword_list (list): Keyword-list with the items to search for.
            timeframe (str, optional): Time frame, respectively, period to search for. Defaults to "today 5-y".
            language (str, optional): Search language. Defaults to "en-US".
            category (int, optional): Define a specific [search category](https://github.com/pat310/google-trends-api/wiki/Google-Trends-Categories). Defaults to 0.
            timezone (int, optional): [Search timezone](https://developers.google.com/maps/documentation/timezone/overview). Defaults to 360.
            country (str, optional): The country, where to search for. Defaults to "".
            property_filter (str, optional): Property filer of the search; only in news, images, YouTube, shopping. Defaults to "".
        """
        self.keyword_list = keyword_list
        self.timeframe = timeframe
        self.language = language
        self.category = category
        self.timezone = timezone
        self.country = country
        self.property_filter = property_filter

        self.pytrends = TrendReq(hl=self.language, tz=self.timezone, **kwargs)
        self.pytrends.build_payload(
            kw_list=self.keyword_list,
            cat=self.category,
            timeframe=self.timeframe,
            geo=self.country,
            gprop=self.property_filter,
        )
        self.reset()

    def reset(self) -> None:
        """Reset the product to empty."""
        self._product = TrendProduct()

    @property
    def trends(self) -> TrendProduct:
        """Return the trend results.

        Returns:
            TrendProduct: (class) TrendProduct contains the dictionary and the return value of it.
        """
        product = self._product
        self.reset()
        return product

    def get_interest_over_time(self) -> None:
        """Request data from a interest over time search."""
        self._product.add_product(
            key=self.get_interest_over_time,
            value=self.pytrends.interest_over_time(),
        )

    def get_interest_by_region(self, resolution: str, **kwargs) -> None:
        """Request data from a interest by region search.

        Args:
            resolution (str): The resolution of the subregion.
        """
        self._product.add_product(
            key=self.get_interest_by_region,
            value=self.pytrends.interest_by_region(resolution=resolution,
                                                   **kwargs),
        )

    def get_trending_searches(self, trend_country: str) -> None:
        """Request data from a search by country.

        Args:
            trend_country (str, optional): Name of the country of intrest. Defaults to "united_states".
        """
        self._product.add_product(
            key=self.get_trending_searches,
            value=self.pytrends.trending_searches(pn=trend_country),
        )

    def get_today_searches(self, today_country: str) -> None:
        """Request data from the daily search trends.

        Args:
            today_country (str): Name of the country of intrest.
        """
        self._product.add_product(
            key=self.get_today_searches,
            value=self.pytrends.today_searches(pn=today_country),
        )

    def get_top_charts(self, date: int, top_country: str) -> None:
        """Request data from a top charts search.

        Args:
            date (int): Year
            top_country (str): Name of the country of intrest.
        """
        self._product.add_product(
            key=self.get_top_charts,
            value=self.pytrends.top_charts(date,
                                           hl=self.language,
                                           tz=self.timezone,
                                           geo=top_country),
        )

    def get_related_topics(self) -> None:
        """Request data of a related topics based on the keyword."""
        self._product.add_product(key=self.get_related_topics,
                                  value=self.pytrends.related_topics())

    def get_related_queries(self) -> None:
        """Request data of a related queries based on the keyword."""
        self._product.add_product(
            key=self.get_related_queries,
            value=self.pytrends.related_queries(),
        )

    def get_suggestions(self) -> None:
        """Request data from keyword suggestion dropdown search."""
        self._product.add_product(
            key=self.get_suggestions,
            value={
                keyword: self.pytrends.suggestions(keyword=keyword)
                for keyword in self.keyword_list
            },
        )

    def get_categories(self) -> None:
        """Request available categories data for the current search."""
        self._product.add_product(
            key=self.get_categories,
            value=self.pytrends.categories(),
        )

    def get_historical_interest(
        self,
        year_start: int,
        month_start: int,
        day_start: int,
        hour_start: int,
        year_end: int,
        month_end: int,
        day_end: int,
        hour_end: int,
        **kwargs,
    ) -> None:
        """Request data from a hour-grided time search.

        Args:
            year_start (int): Starting year
            month_start (int): Starting month
            day_start (int): Starting day
            hour_start (int): Starting hour
            year_end (int): Final year
            month_end (int): Final month
            day_end (int): Final day
            hour_end (int): Final hour
        """
        self._product.add_product(
            key=self.get_historical_interest,
            value=self.pytrends.get_historical_interest(
                keywords=self.keyword_list,
                year_start=year_start,
                month_start=month_start,
                day_start=day_start,
                hour_start=hour_start,
                year_end=year_end,
                month_end=month_end,
                day_end=day_end,
                hour_end=hour_end,
                cat=self.category,
                geo=self.country,
                gprop=self.property_filter,
                **kwargs,
            ),
        )
Exemple #24
0

pytrends = TrendReq(hl='en-US', tz=360)

key_words = ['Programming']

pytrends.build_payload(key_words)

data_frame1 = pytrends.interest_over_time()
print(data_frame1.head())
print('---------------')

data_frame2 = pytrends.interest_by_region(resolution='COUNTRY')
print(data_frame2.head())
print('---------------')

data_frame3 = pytrends.related_topics()
print(data_frame3)
print('---------------')

data_frame4 = pytrends.trending_searches(pn='ukraine')
print(data_frame4.head())
print('---------------')

data_frame5 = pytrends.top_charts(date='2020')
print(data_frame5.head())
print('---------------')

data_frame6 = pytrends.suggestions('UCU')
print(data_frame6)
 def test_related_topics(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=['pizza', 'bagel'])
     self.assertIsNotNone(pytrend.related_topics())
def GetRelatedTopics(request):
	pytrend = TrendReq()
	keyword = getSlotValue(request['intent'], 'KEYWORD')

	if(keyword != -1):
		try:
			pytrend.build_payload(kw_list=[keyword])
			df = pytrend.related_topics()[keyword]
		except Exception as e:
			print(type(e))
			print(e.args)
			ErrorMessages = [
				"Google is not very helpful sometimes, this is so embarrasing.",
				"Sorry for the inconvenience.",
				"This is so embarrasing that I can't help.",
				"Something wrong happened!",
				"There was an error fetching the details.",
				"There was a problem retrieving the data.",
				"Nobody is perfect, atleast I tried.",
				"Sorry for such an absurd behaviour. I can't help.",
				"There was an error",
				"Error's happen. That's what happened right now.",
				"An error occurred!",
				"Keep calm, light a fire and try again.",
				"The operation couldn't be completed."
			]
			temp = getRandom(ErrorMessages)
			return response_plain_text(
					temp,
					True, 
					attributes,
					"Our apologies",
					"Sorry for the inconvenience",
					"What can I do for you?"
				)
		else:
			topics_df = df.sort_values(by='value', ascending=False)
			
			topics = []
			#random.shuffle(len(topics_df))
			for i, row in topics_df.iterrows():
				topics.append(row['title'])

			random.shuffle(topics)

			length = 5
			outputSpeech, cardContent, extra = getOSandCC(topics, length, True)
			AnswerMessages = [
				"Here are the topics related to ",
				"Here's what I have for you regarding ",
				"Here are some top topics related to ",
				"Here are the best topics related to ",
				"This is what I've got related to ",
				"Yes, I've got something for you regarding ",
				"Best topics in the category of "
			]
			temp = getRandom(AnswerMessages)
			return response_plain_text(
					temp + keyword + " ... " + outputSpeech,
					True,
					{},
					"Hot topics related to - " + keyword,
					cardContent,
					"Would you like to hear about " + extra
				)

	else:
		return keywordRequired()
Exemple #27
0
 def test_related_topics(self):
     pytrend = TrendReq()
     pytrend.build_payload(kw_list=["pizza", "bagel"])
     self.assertIsNotNone(pytrend.related_topics())
Exemple #28
0
                #               Related Queries for 1 Hour
                rq_ = pytrends3.related_queries()
                q = 'query'
                v = 'value'
                for keys in rq_.keys():
                    df_rq_['Top Queries'] = rq_[keys]['top'][q]
                    df_rq_['Top Query Value '] = rq_[keys]['top'][v]
                    df_rq_['Rising Query'] = rq_[keys]['rising'][q]
                    df_rq_['Rising Query Value'] = rq_[keys]['rising'][v]
                df_rq_.to_csv("Google_Trends/RelatedQueries1Day.csv",
                              header=False,
                              mode='w',
                              index=False)

                #                 Related Topics over the last 1 Hour
                ts_ = pytrends3.related_topics()
                df_ts_ = pd.DataFrame(columns=['Title', 'Value', 'Mid'])
                for keys in ts_.keys():
                    df_ts_['Title'] = ts_[keys].title
                    df_ts_['Value'] = ts_[keys].value
                    df_ts_['Mid'] = ts_[keys].mid
                df_ts_.to_csv("Google_Trends/RelatedTopics1Day.csv",
                              header=False,
                              mode='w',
                              index=False)

                counter += 1
                print("Hour", counter)

                time.sleep(3600)
    year_start=2022, month_start=1, day_start=1, hour_start=0,
    year_end=2022, month_end=2, day_end=10, hour_end=23,
)
data

# the keyword to extract data
kw = "python"
pt.build_payload([kw], timeframe="all")
# get the interest by country
ibr = pt.interest_by_region("COUNTRY", inc_low_vol=True, inc_geo_code=True)

# sort the countries by interest
ibr[kw].sort_values(ascending=False)

# get related topics of the keyword
rt = pt.related_topics()
rt[kw]["top"]

# get related queries to previous keyword
rq = pt.related_queries()
rq[kw]["top"]

# get suggested searches
pt.suggestions("python")

# another example of suggested searches
pt.suggestions("America")

# trending searches per region
ts = pt.trending_searches(pn="united_kingdom")
ts
Exemple #30
0
                query_json = json.loads(line.rstrip())
                keyword = query_json['keyword']
                gt_queries = query_json['gt_queries']

                # if current keyword exists in visited query, skip current keyword
                if len(visited_query) > 0 and keyword in visited_query:
                    continue

                # get the topic id if no topic exists
                if 'topic_id' not in query_json:
                    trends_crawler = TrendReq()
                    trends_crawler.build_payload(keyword=gt_queries,
                                                 timeframe=ALL_PERIOD,
                                                 gprop=GPROP)
                    related_topics_list = trends_crawler.related_topics()

                    artist1, title1 = keyword.split(' - ', 1)
                    artist1 = reformat(artist1)
                    title1 = reformat(title1)

                    topic_id = None
                    # select the song mid with the highest relevant score from a list of related topics
                    for topic_quad in related_topics_list:
                        type = topic_quad['type']
                        value = topic_quad['value']
                        # return the first song mid
                        if type.startswith('Song by'):
                            if value > 40:
                                topic_id = topic_quad['mid']
                                query_json.update(topic_quad)