コード例 #1
0
def scrape():
    # load queries
    hpv_queries = load_file('hpv_queries.txt').split('\n')
    mfr_queries = load_file('mfr_queries.txt').split('\n')

    trends = pyGTrends
    # connect to google
    trends = pyGTrends(USER, PASS)

    # scrape hpv
    hpv_matrix = []
    for query in hpv_queries:
        trends.request_report(keywords=query, date='01/2011 60m', geo='DK')
        raw = trends.get_data()
        hpv_matrix.append(parse_counts(raw))
    hpv_matrix = np.array(hpv_matrix)
    hpv_matrix = np.transpose(hpv_matrix)
    np.savetxt(DATA_DIR + 'hpv_data.txt', hpv_matrix, fmt='%i', delimiter=",")
    # scrape mfr
    mfr_matrix = []
    for query in mfr_queries:
        trends.request_report(keywords=query, date='01/2011 60m', geo='DK')
        raw = trends.get_data()
        mfr_matrix.append(parse_counts(raw))
    mfr_matrix = np.array(mfr_matrix)
    mfr_matrix = np.transpose(mfr_matrix)
    np.savetxt(DATA_DIR + 'mfr_data.txt', mfr_matrix, fmt='%i', delimiter=",")
コード例 #2
0
ファイル: get_trends.py プロジェクト: jpforny/pytrends
def get_day_trends():
    
    time.sleep(float(initial_sleep))
    
    start = time.time()
    
    # Google Trends client
    logger.info("Connecting to Google")
    connector = pyGTrends('', '')
    logger.info("Connected to Google")
    
    df = pd.DataFrame()
    
    counter = 0
    
    kill_at = pd.Timestamp(strftime("%Y-%m-%d 21:00:00"))
    
    spent_time_connection = time.time() - start - (start % 1)
    
    try:
        while True:

            start = time.time()
            
            now = pd.Timestamp(ctime())
            if now > kill_at:
                logger.info("Killing job...")
                break
            
            try:    
                current_trends = get_trends(connector)
                df = df.append(current_trends)
                counter += current_trends.shape[0]
            except:
                logger.error("Error requesting latest trends: {}".format(str(sys.exc_info()[0])))
                logger.error(traceback.format_exc())
            
            # Print a status message
            if counter % 1000 == 0:
                logger.info("{} stories fetched".format(counter))
            
            spent_time = time.time() - start 
            
            logger.info("Current job took {}\n".format(spent_time))
            
            if spent_time_connection > 0:
                time.sleep(60 - spent_time - spent_time_connection)
                spent_time_connection = 0
            else:
                time.sleep(60 - spent_time - (start % 1))
    except:
        logger.error("Exiting: " + str(sys.exc_info()[0]))
        logger.error(traceback.format_exc())

    beginning = str(df.iloc[0]['timestamp'])
    end = str(df.iloc[df.shape[0] - 1]['timestamp'])
    df.index.name = 'rank'
    df.to_csv("trends({})[{}, {}].csv".format(hl_param, beginning, end))
    
    logger.info("Saved day trends from {} to {}".format(beginning, end))
コード例 #3
0
def google_trend_crawler(name):
    #suggestions = {}
    google_username = "******"
    google_password = "******"
    title = name.replace('_', ' ')
    title = title.translate(None, '():')
    title = urllib.unquote(title)
    title = title.decode('utf-8')
    title = title.encode("cp1252")

    
    connector  = pyGTrends(google_username, google_password)
    connector.request_report(title, hl='en-US')
    suggestions = connector.get_suggestions(title)
    if suggestions['default']['topics'] != []:
        title = suggestions['default']['topics'][0].get('mid')
        # wait a random amount of time between requests to avoid bot detection
        time.sleep(randint(5, 10))
        print title
        connector.request_report(title, hl='en-US')
        name = name.translate(None, ':*\'')
        name = name.decode("cp1252")
        name = urllib.quote_plus(name.encode("utf-8"))
        connector.save_csv(scientisis_dir, name)
    else:
        with open(errorfile, "a") as myfile:
            myfile.write("%s\n" % name)
    return
コード例 #4
0
	def __init__(self, searchterms):
		self.trends = []
		self.trends.append(searchterms)
		con = pyGTrends("trendsscraper123", "googleTrends")
		con.request_report(self.formatSearchTerms(searchterms), date = "today 7-d")
		data = con.get_data()
		self.trends += self.processScore(data)
コード例 #5
0
ファイル: scraper.py プロジェクト: jirmauritz/websci
def scrape():
    # load queries
    hpv_queries = load_file('hpv_queries.txt').split('\n')
    mfr_queries = load_file('mfr_queries.txt').split('\n')

    trends = pyGTrends
    # connect to google
    trends = pyGTrends(USER, PASS)

    # scrape hpv
    hpv_matrix = []
    for query in hpv_queries:
        trends.request_report(keywords=query, date='01/2011 60m', geo='DK')
        raw = trends.get_data()
        hpv_matrix.append(parse_counts(raw))
    hpv_matrix = np.array(hpv_matrix)
    hpv_matrix = np.transpose(hpv_matrix)
    np.savetxt(DATA_DIR + 'hpv_data.txt', hpv_matrix, fmt='%i', delimiter=",")
    # scrape mfr
    mfr_matrix = []
    for query in mfr_queries:
        trends.request_report(keywords=query, date='01/2011 60m', geo='DK')
        raw = trends.get_data()
        mfr_matrix.append(parse_counts(raw))
    mfr_matrix = np.array(mfr_matrix)
    mfr_matrix = np.transpose(mfr_matrix)
    np.savetxt(DATA_DIR + 'mfr_data.txt', mfr_matrix, fmt='%i', delimiter=",")
コード例 #6
0
 def __init__(self, file_paths, credentials):
     # Set file paths
     self.file_paths = file_paths
     wb_filepath = os.path.abspath(file_paths['input_file'])
     # Read an input workbook
     self.in_workbook = openpyxl.load_workbook(filename=wb_filepath)
     # Read country codes.
     self.country_codes = self.get_country_codes(file_paths['country_codes_file'])
     # Connect to Google
     self.g_connector = pyGTrends(credentials['google_username'], credentials['google_password'])
コード例 #7
0
ファイル: gtrend.py プロジェクト: yuantuo/DataExample
 def __init__(self):
     # connect to Google
     try:
         self.google_username = "******"
         self.google_password = "******"
         self.connector = pyGTrends(self.google_username, self.google_password)
         time.sleep(randint(5, 10))
     except:
         print('I am unable to connect to google trends.')
         sys.exit(-1)
コード例 #8
0
 def __init__(self):
     # connect to Google
     try:
         self.google_username = "******"
         self.google_password = "******"
         self.connector = pyGTrends(self.google_username,
                                    self.google_password)
         time.sleep(randint(5, 10))
     except:
         print('I am unable to connect to google trends.')
         sys.exit(-1)
コード例 #9
0
 def __init__(self, file_paths, credentials):
     # Set file paths
     self.file_paths = file_paths
     wb_filepath = os.path.abspath(file_paths['input_file'])
     # Read an input workbook
     self.in_workbook = openpyxl.load_workbook(filename=wb_filepath)
     # Read country codes.
     self.country_codes = self.get_country_codes(
         file_paths['country_codes_file'])
     # Connect to Google
     self.g_connector = pyGTrends(credentials['google_username'],
                                  credentials['google_password'])
コード例 #10
0
def get_trend_score(query, horizon=52, trail=3):
    try:
        connector = pyGTrends(_GNAME, _GPASS)
        connector.request_report(query)
        sleep(5)

        stock_data = parse_data(connector.decode_data)
        trend = TrendAnalysis(query, stock_data)
        trend.set_stats()
        return trend
    except Exception as e:
        _log.error(e)
        return "N/A"
コード例 #11
0
    def __init__(self, path, startyear):
        self.startyear = startyear

        # ADD YOUR ACCOUNT INFOS
        self.google_username = ""
        self.google_password = ""

        if not os.path.exists(path):
            os.mkdir(path)
        self.logfilename = path+"log-fails.txt"

        self.connector = pyGTrends(self.google_username, self.google_password)
        self.path = path
コード例 #12
0
def main():

    name = sys.argv[1]
    if len(sys.argv) >= 2:
        for argument in sys.argv[2:]:
            name += " " + argument

    newpath = "GoogleTrendsData/" + name
    if not os.path.exists(newpath):
        os.makedirs(newpath)

    path = "GoogleTrendsData/"
    path += name + "/"
    csv_name = name + "_trend"
    cleaned_csv_name = name + "_trend_cleaned.csv"

    # connect to Google
    connector = pyGTrends(google_username, google_password)

    # make request
    connector.request_report(name)

    # wait a random amount of time between requests to avoid bot detection
    time.sleep(randint(5, 10))

    # download file
    connector.save_csv("../../src/main/resources/static/", "google_trend")

    with open("../../src/main/resources/static/google_trend.csv") as in_file:
        csv_reader = csv.reader(in_file)

        for i in range(5):
            next(csv_reader)
        dates_mapping = defaultdict(int)
        for line in csv_reader:
            if not line:
                break
            dateString = line[0]
            dates = dateString.split(' - ')
            start_dates = dates[0].split('-')
            start_year = start_dates[0]
            start_month = start_dates[1]
            dates_mapping[start_year + "/" + start_month] += int(line[1])

        with open("../../src/main/resources/static/google_trend_cleaned.csv", "w") as out_file:
            csv_writer = csv.writer(out_file)
            csv_writer.writerow(['time', 'quantity'])
            for item in sorted(dates_mapping.items()):
                csv_writer.writerow([item[0], item[1]])
コード例 #13
0
    def generateCSV(self):
        google_username = "******"
        google_password = "******"
        path = "/Users/AnnaGupta/hack-cmu-2015/pytrends-master/examples/"

        # connect to Google
        connector = pyGTrends(google_username, google_password)

        # make request
        connector.request_report("%s" % self.word, hl="en-US", cat=None, geo="US", date=None)

        # wait a random amount of time between requests to avoid bot detection
        time.sleep(randint(5, 10))

        # download file
        connector.save_csv(path, self.word)
コード例 #14
0
	def generateCSV(self):
		google_username = "******"
		google_password = "******"
		path = os.getcwd()+"/"

		# connect to Google
		connector = pyGTrends(google_username, google_password)

		# make request
		connector.request_report("%s" % self.word, hl='en-US', cat=None, geo='US', date="today 7-d")

		# wait a random amount of time between requests to avoid bot detection
		time.sleep(randint(5, 10))

		# download file
		connector.save_csv(path, "data")
コード例 #15
0
def mongo_write():
    client = pymongo.MongoClient('localhost', 27017)
    db = client['googletrends']
    collection = db['genre_query_history']
    # path = ""
    # results=get_all_freebase_genres()
    # print results
    count = 0
    # # # print len(results["results"]["bindings"])
    for result in collection.find():
        count += 1
        print count
        # print result
        # break
        fid = result['freebase_id']
        # fid='/m/0bkbm'
        print fid
        if 'query_overtime' not in result:
            continue
        if 'query_us_states' in result:
            continue

        # connect to Google
        connector = pyGTrends(google_username, google_password)

        # make request
        connector.request_report(fid, geo='US')
        print
        res_json = csv2json(connector.decode_data.split('\n'))
        res_json['freebase_id'] = fid
        # print res_json#['query_regions']

        # collection.find_one({'query_title': fid}).update()

        # if collection.find_one({'query_title': fid}) == None:
        #     collection.insert_one(res_json)
        if 'query_us_states' in res_json:
            genre = collection.find_one({'freebase_id': fid})
            # print genre
            genre['query_us_states'] = []
            genre['query_us_states'] = res_json['query_us_states']
            collection.save(genre)
            # break
        # wait a random amount of time between requests to avoid bot detection
        time.sleep(randint(5, 10))
コード例 #16
0
def get_top_date(actor_name, peak_num):

    google_username = "******"
    google_password = "******"

    fid = es_dao.get_actor_freebase_id_by_name(actor_name)
    print fid
    connector = pyGTrends(google_username, google_password)

    # make request
    connector.request_report(fid)
    res_json = csv2json(connector.decode_data.split('\n'))
    res_json['freebase_id'] = fid
    #print "Here is result in json:", res_json
    overtime_data = []
    timestamp_start = []
    timestamp_end = []
    for weekly_data in res_json['query_overtime']:
        overtime_data.append(int(weekly_data['querycount']))
        timestamp_start.append(weekly_data['starttime'])
        timestamp_end.append(weekly_data['endtime'])
    #print res_json['query_overtime']
    # print timestamp
    # print overtime_data

    sorted_data = sorted(range(len(overtime_data)),
                         key=lambda i: overtime_data[i])[-peak_num:]
    # print sorted_data

    for index in sorted_data:
        print "date:{}, count{}".format(timestamp_start[index],
                                        overtime_data[index])

    # array_data = np.asarray(overtime_data)
    # peakind =signal.find_peaks_cwt(array_data, np.arange(5,10))
    # print peakind

    # plt.plot(overtime_data)
    # plt.show()

    # wait a random amount of time between requests to avoid bot detection
    time.sleep(randint(5, 10))
    return sorted_data, overtime_data, timestamp_start, timestamp_end
コード例 #17
0
    def run(self):
        google_username = "******"
        google_password = "******"
        path = "/home/vagner/workspace/ITSGoogleTrends/output/"

        # connect to Google
        try:
            self.show("Realizando conexão com usuário " + google_username)
            connector = pyGTrends(google_username, google_password)
            self.show("Conexão realizada com sucesso")
        except Exception as e:
            raise ITSGoogleTrendsError ("Erro durante a conexão com o Google.")
        #montando a string de requisicao

        # Lendo os dados do arquivo csv
        table = rows.import_from_csv(self._CSV_FILE_PATH + self._CSV_FILE_NAME)

        rows_number = len(table)
        index = 0

        for row in table:
            its_name = str(row.system).lower()
            index = index + 1
            self.show("Início da busca dos dados de tendência para o ITS: {0:s} [{1:d}/{2:d}]".format(its_name,index,rows_number))
            str_request =  self._DEFAUT_KEYWORD + "," + its_name

            self.show("Realizando uma requisição com a sentença " + str_request)
            # make request
            connector.request_report(str_request)

            # download file
            self._now = str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
            csv_file_name = "{0:s}-{1:s}".format(self._now, str_request.replace(",", "-").replace(" ","-"))
            connector.save_csv(path, csv_file_name)
            self.show("Resultados escritos no arquivos {0:s}.csv".format(csv_file_name))

            # wait a random amount of time between requests to avoid bot detection
            wait_time = randint(5, 10)
            self.show("Aguardando {0:d} segundos para uma nova requisição".format(wait_time))
            time.sleep(wait_time)
        #end for
        self.show("Execução realizada com sucesso!")
コード例 #18
0
ファイル: GoogleTrends.py プロジェクト: dekedor/AtRiskWiki
def GetGoogleTrends(subject, page_id, deleteTempFile = False):
    """
    Grabs data from Google trends for a given topic.
    Creates a temporary csv file that can optionall be deleted after use.
    inputs
        subject: topic to search (string)
        deleteTempFile: if True, deltes temporary file created after use
    output
        Dataframe of number of searches for each week
    """
    subject = re.sub(',', '', subject)
    google_username = "******"
    google_password = "******"
    path = '%s_temp.csv' % subject
    connector = pyGTrends(google_username, google_password)
    connector.request_report(subject)
    data = connector.decode_data
    # so... data is a string intended to become a csv file
    # the first four lines are useless and need to be removed
    # there is likely a better way to do this than what I do below, but it works
    data = re.sub('Web.*', '', data)
    data = re.sub('World.*', '', data)
    data = re.sub('World.*', '', data)
    data = re.sub('Interest.*', '', data)
    data = re.sub('\n\n\n\n', '', data)
    data = re.split('.*, \n', data)[0]
    data = re.split('\n\n', data)[0]
    # write temporary file then open in pandas... this is just easier, but might be a better way
    f = open(path, 'w+')
    f.write(data)
    f.close()
    try:
        df = pd.read_csv(path)
    except:
        return None
    df.columns = ['Week', 'Searches']
    df['Week'] = [re.sub('-', '', re.split(' - ', week)[0]) for week in df['Week']]
    df['page_id'] = [page_id for week in df['Week']]
    df = df[['page_id', 'Week', 'Searches']]
    if deleteTempFile:
        os.remove(path)
    return df
コード例 #19
0
def get_top_date(actor_name, peak_num):

    google_username = "******"
    google_password = "******"

    fid = es_dao.get_actor_freebase_id_by_name(actor_name)
    print fid
    connector = pyGTrends(google_username, google_password)

    # make request
    connector.request_report(fid)
    res_json=csv2json(connector.decode_data.split('\n'))
    res_json['freebase_id']=fid
    #print "Here is result in json:", res_json
    overtime_data = []
    timestamp_start =[]
    timestamp_end =[]
    for weekly_data in res_json['query_overtime']:
        overtime_data.append(int(weekly_data['querycount']))
        timestamp_start.append(weekly_data['starttime'])
        timestamp_end.append(weekly_data['endtime'])
    #print res_json['query_overtime']
    # print timestamp
    # print overtime_data

    sorted_data = sorted(range(len(overtime_data)), key=lambda i: overtime_data[i])[-peak_num:]
    # print sorted_data

    for index in sorted_data:
        print "date:{}, count{}".format(timestamp_start[index], overtime_data[index])

    # array_data = np.asarray(overtime_data)
    # peakind =signal.find_peaks_cwt(array_data, np.arange(5,10))
    # print peakind

    # plt.plot(overtime_data)
    # plt.show()

    # wait a random amount of time between requests to avoid bot detection
    time.sleep(randint(5, 10))
    return sorted_data, overtime_data, timestamp_start, timestamp_end
コード例 #20
0
ファイル: googleTrends.py プロジェクト: klee97/magic-master
def main():
    googleUsername = "******"
    googlePassword = "******"
    
    #connect to Google
    connector = pyGTrends(googleUsername, googlePassword)
    
    with open("listFinal.txt", "r") as f:
        
        #creates array each containing a card name as a string
        cards = [line.rstrip('\n') for line in f]
        
        for card in cards:
            connector.request_report(card,geo="US", date="today 90-d")
            connector.save_csv("./output/", card)
            
            #so that google trends doesn't get suspicious of scripting...
            time.sleep(randint(4,8))

    path = "./output"

    os.chdir(path)


    #gets rid of all the junk in the csv file
    for file in glob.glob("*.csv"):
        
        f = open(file,"r")
        
        lines = f.readlines()
        f.close()

        del(lines[0:5])
        lines = lines[0:90]

        f = open(file,"w")

        for line in lines:
            f.write(line)
コード例 #21
0
def main(keywords):
    google_username = "******"
    google_password = "******"
    path = ""
    # regions = ['US-AK-743', 'US-AK-745', 'US-AK-747']
    regions = ['US-AK-743']
    sample_counter = 1

    # connect to Google
    connector = pyGTrends(google_username, google_password)

    print "connected"

    # make request
    for region in regions:
        while sample_counter < 10:
            connector.request_report(keywords, hl='en-US', geo=region)
            print "waiting..."
            time.sleep(randint(5, 10))
            print "saving..."
            # download file
            connector.save_csv(path, region + '_' + str(sample_counter))
            print "done with iteration", sample_counter
            sample_counter += 1
コード例 #22
0
#
# Install requirements with pip
#
#
#

from pytrends.pyGTrends import pyGTrends
import time
import os
from random import randint
import pandas as pd

# Add your Gmail username to the google_username variable and your Gmail password to the google_password variable.
google_username = ""
google_password = ""
connector = pyGTrends(google_username, google_password)

# This script downloads a series of CSV files from Google Trends. Please specify a filepath for where you'd like these files to be stored in the below variable.
path = ""

# Specify the filename of a CSV with a list of keywords in the variable, keyordcsv. The CSV should be one column, with header equal to Keywords (case sensitive).
keywordcsv = "keywords.csv"
keywords = pd.read_csv(keywordcsv)

# Downloads and Calculate Slope:
keywordlist = pd.DataFrame(columns=["keyword", "slope"])
for index, row in keywords.iterrows():
    print("Downloading Keyword #" + str(index))
    payload = {'geo': 'US', 'q': [row[0]]}
    connector.request_report(payload)
    time.sleep(randint(5, 10))
コード例 #23
0
ファイル: getData.py プロジェクト: stunax/Ws
        #If weekly data, convert to monthly.
        wordfile = onlymonths(wordfile,monthsnum)
        if len(wordfile) > 0:
            months = wordfile
        wordfilenum = map(lambda x: x.split(",")[1],wordfile)
        result[:,i] = np.array(wordfilenum)
        time.sleep(randint(2, 5))
    words = np.array(words)[dont_skip]
    months = map(lambda x: x.split(",")[0],months)
    result = result[:,dont_skip]
    return (result,words,months)

def main(text1,text2,name,google):
    words = tokenize(text1,text2)
    save_words(name,words)
    data = getData(words,name,google)
    save_csv("../data/" + name + "dat.csv",data[0],data[1],data[2])
    print "done with " + name


if __name__ == '__main__':
    #getData([u"æøå"],"")
    print "Talking with google. This takes time!"
    google_username = "******"
    google_password = "******"#"
    google = pyGTrends(google_username, google_password)
    main("../data/MFR1.txt","../data/MFR2.txt","MFR",google)
    #main("../data/DiTe1.txt","../data/DiTe2.txt","DiTe",google)
    main("../data/HPV1.txt","../data/HPV2.txt","HPV",google)
    #main("../data/PCV1.txt","../data/PCV2.txt","PCV",google)
コード例 #24
0
ファイル: main.py プロジェクト: Kullax/WebScience16
def run(type, online=True):
    # Lasso Prediction - tolerance set to avoid converge warning
    model = linear_model.Lasso(tol=0.001)

    tmp = []
    google_username = "******" # We Be Anomynous
    google_password = "******"
    # Location for .csv
    path = "trends/"+type+"/"

    # Using pytrends for gathering data from Google - but only if online, else uses local data
    # connect to Google
    if online:
        connector = pyGTrends(google_username, google_password)
    # tokenizer will determine the trends needed
    for trend in tokenizer.run(type):
        # file names should avoid danish specialcases
        name=trend.replace('ø','oe').replace('æ','ae').replace('å','aa')
        if online:
            # make request
            connector.request_report(str(trend), hl='dk', geo='DK', date="01/2011 57m")
            # wait a random amount of time between requests to avoid bot detection
            time.sleep(randint(3, 6))
            # download file
            connector.save_csv(path, name)
        # Once a csv file has been recovered. Extract the monthly information
        months = regular.GetArrayFromFile(""+path+name+".csv")
        if months != None:
            tmp.append(months)
    # Convert the tmp list to a numpy array of proper dimension
    X = np.array(tmp).transpose()
    # Extract clinical data
    json_pattern = re.compile('[0-9]+\.[0-9]+')
    f = open("vactionations/"+type+"-1.json", "r")
    data = f.read()
    f.close()
    match = json_pattern.findall(data)
    # Y is now the clinical data
    Y = np.array([float(x) for x in match][0:len(X)])
    # Preform 5-fold crossvalidation
    k_fold = cross_validation.KFold(len(X), 5)
    v = 0
    plt.figure(type + " 5-fold graphs")
    for k, (train, test) in enumerate(k_fold):
        model.fit(X[train], Y[train])
        plot_y = model.predict(X[test])
        plt.subplot(5, 1, k+1)
        plt.ylabel("Fold %s" % (k+1))
        plot_x = range(len(plot_y))
        plt.plot(plot_x, plot_y, color='b', label='Prediction')
        plt.plot(plot_x, Y[test], color='r', label='Clinical')
        # RMSE for each fold is summed
        RMSE = mean_squared_error(Y[test],plot_y)
        v += RMSE
        print test
    # overall RMSE is determined
    print type, "RMSE", np.sqrt(v/5)
    # For fun, a full prediction is made, to compare the model after 5 folds
    # and the ground truth data
    plt.xlabel("Months")
    plt.show()
    plt.figure(type + " full prediction")
    plot_y = model.predict(X)
    plot_x = range(len(plot_y))
    plt.plot(plot_x, plot_y, color='b', label='Prediction')
    plt.plot(plot_x, Y, color='r', label='Clinical')
    plt.xlabel("Months")
    plt.legend(loc="upper right", fancybox=True)
    plt.show()
コード例 #25
0
MYUSERNAME = "******"

google_username = config.google["User_Name"]
google_password = config.google["Password"]

logging.basicConfig(filename="logs.log", level=logging.INFO)


def now_time():
    now = datetime.datetime.now()
    return now.strftime("[%Y/%m/%d %H:%M:%S]")


logging.info("%s Connecting to google..." % now_time())
print("%s Connecting to google..." % now_time())
MYCONNECTOR = pyGTrends(google_username, google_password)
logging.info("%s Connected to google" % now_time())
print("%s Connected to google" % now_time())
logging.info("\n")

# time to sleep if got a 420 error
BACKOFF = 2
backoff = BACKOFF

# don't respond to queries from these accounts
BLACKLIST = ["pixelsorter", "lowpolybot", "slashkarebear", "slashgif", "slashremindme"]

TIME_SPAN_OPTIONS = [str(a) + "y" for a in range(1, 12)]
TIME_SPAN_OPTIONS += [str(a) + "m" for a in range(1, 91)]
TIME_SPAN_OPTIONS += [str(a) + "d" for a in range(1, 91)]
コード例 #26
0
ファイル: trends.py プロジェクト: p9anand/GetGoogleTrendsData
# connector = pyGTrends(google_username, google_password)

server = 'localhost'
database = 'GoogleTrends'
user = '******'
password = '******'
log.info('Connecting to data base....!')
connection = pymysql.connect(host=server,
                             user=user,
                             password=password,
                             db=database)
log.info('We are connected to Database')
keywords = 'Policybazaar'

log.info('Connecting to Google Trends....!')
connector = pyGTrends(google_username, google_password)
log.info('Connected to Google Trends.')


while True:
    # connect to Google
    # connector = pyGTrends(google_username, google_password)
    # make request
    log.info('we are inside while loop')
    try:

        connector.request_report(keywords, hl='en-US', cat=None, geo='IN', date='now 1-H', tz="Etc/GMT-5:30")
        log.info('Requested query got...')
        xx = connector.get_data()
        yy = str(xx)
        ll = yy.split('\n')
コード例 #27
0
ファイル: example.py プロジェクト: zldoty/pytrends
from pytrends.pyGTrends import pyGTrends
import time
from random import randint

google_username = "******"
google_password = "******"
path = ""

# connect to Google
custom_useragent = {'User-Agent': 'My Pytrends Script'}
connector = pyGTrends(google_username, google_password, custom_useragent)

# make request
payload = {
    'q': ['Pizza, Italian, Spaghetti, Breadsticks, Sausage'],
    'cat': '0-71'
}
connector.request_report(payload)

# wait a random amount of time between requests to avoid bot detection
time.sleep(randint(5, 10))

# download file
connector.save_csv(path, "pizza")

# get suggestions for keywords
keyword = "milk substitute"
data = connector.get_suggestions(keyword)
print(data)
コード例 #28
0
 def __init__(self, search_words = None, file_name = "report.csv"):
     self.save_path = "pytrends/" + file_name
     self.search_words = search_words
     self.connector = pyGTrends("*****@*****.**", "gotneedforspeed")