Exemple #1
0
def getIndex(keywords, endDate):
    baidu_index = BaiduIndex(keywords, '2015-01-01', endDate)
    df = pd.DataFrame()
    for index in baidu_index.get_index():
        #print(index)
        baidu_index2 = dict_to_df(index)
        if baidu_index2['type'] == 'all':
            if df.empty:
                df = baidu_index2
            else:
                df = df.append(baidu_index2)
    df.to_csv(
        'D:/coin/spider-BaiduIndex-master/spider-BaiduIndex-master/SearchIndex.csv',
        encoding='utf_8_sig')
Exemple #2
0
def get_index():
    start_time = time.time()
    db = mysql_operation.getcon()
    select_sql = "select `tail_word` from `t_tail_word` where extr_flag = '0' and tail_word like '%php%' limit 5"
    update_sql = "update `t_tail_word` set extr_flag = '1' where extr_flag = '0' and tail_word like '%php%' limit 5"
    results = mysql_operation.baseselect(db, select_sql)
    mysql_operation.baseoperation(db, update_sql)
    search_list = []
    if len(results) > 0:  # 查询结果
        for row in results:
            search_list.append(row[0])
        baidu_index = BaiduIndex(search_list, '2018-01-01', '2019-04-01')
        for item in search_list:
            print(item, baidu_index.result[item]['all'])
            if len(baidu_index.result[item]['all']) > 0:  # 搜索出来的index数量
                all_avg = index_avg(baidu_index.result[item], 'all')
                pc_avg = index_avg(baidu_index.result[item], 'pc')
                wise_avg = index_avg(baidu_index.result[item], 'wise')
                print(all_avg, pc_avg, wise_avg)
                insert_sql = "INSERT INTO `baidu_index` " \
                             "(`word`, `start_time`, `end_time`, `status`, `all_avg`, `pc_avg`, `wise_avg`) " \
                             "VALUES ('%s', '%s', '%s', '0', '%s','%s','%s')" \
                             % (item, '2018-01-01', '2019-04-01', all_avg, pc_avg, wise_avg)
            else:  # 未收录
                insert_sql = "insert into `baidu_index` (`word`, `start_time`, `end_time`, `status`)" \
                             "values ('%s', '%s', '%s', '1')" % (item, '2018-01-01', '2019-04-01')
            mysql_operation.baseoperation(db, insert_sql)
    else:
        pass
        # 数据库查询结果没有
    mysql_operation.closecon(db)
    print("times  :  ", time.time() - start_time)
    pass
Exemple #3
0
def main(keywords_list, start_date, end_date):
    """
    爬虫调用主程序
    :param keywords_list: 关键词列表 -->list
    :param start_date: 开始时间 -->str
    :param end_date: 结束时间 -->str
    :return: 存入数据库
    """
    for keyword in keywords_list:
        index = BaiduIndex(keyword, start_date, end_date)
        data = index.result[keyword]["all"]
        for i in range(len(data)):
            index = data[i]["index"]
            date = data[i]["date"]
            save_to_sql((keyword, index, date))
Exemple #4
0
# -*- coding: utf-8 -*-
"""
Created on 2019/2/15 16:37
@Author: Johnson
@Email:[email protected]
@File: demo.py
"""
from get_index import BaiduIndex

if __name__ == "__main__":
    """
    最多一次请求5个关键词
    """
    # 查看城市和省份的对应代码
    print(BaiduIndex.city_code)
    print(BaiduIndex.province_code)

    baidu_index = BaiduIndex(['张艺兴', 'lol', '极限挑战', '吃鸡'], '2018-12-25', '2019-02-14',901)
    for data in baidu_index('lol', 'all'):
        print(data)

    # 获取全部5个关键词的全部数据
    print(baidu_index.result)
    # 获取1个关键词的全部数据
    print(baidu_index.result['极限挑战'])
    # 获取1个关键词的移动端数据
    print(baidu_index.result['极限挑战']['wise'])
    # 获取1个关键词的pc端数据
    print(baidu_index.result['极限挑战']['pc'])
Exemple #5
0
from get_index import BaiduIndex

if __name__ == "__main__":
    keywords = ['爬虫', 'lol', '张艺兴', '人工智能', '华为', '武林外传']
    baidu_index = BaiduIndex(keywords, '2018-01-01', '2019-05-02')
    for index in baidu_index.get_index():
        print(index)
Exemple #6
0
import pandas as pd
from key_word import *
import datetime
import time
from get_index import BaiduIndex

if __name__ == "__main__":
    start = time.time()
    today = datetime.datetime.now().strftime('%Y-%m-%d')
    keywords = getkeywords('stockname.csv')
    keywords = keywords[:500]

    index = BaiduIndex(keywords)
    index.run(10)  #the parameter is how many thread we want
    res = index.get_index()
    res.to_csv(today + '-index.csv')
    print(time.time() - start)
Exemple #7
0
from get_index import BaiduIndex
if __name__ == "__main__":
    """
        最多一次请求5个关键词
        抓取全国地级市全部数据
    """
    baidu_index = BaiduIndex([ '故宫','天坛','长城','天安门','颐和园'], '2017-09-30', '2018-10-01')

    """
        根据指定省份编码抓取省份相关地级市关键词
    """
#    province = ["931", "933", "934"]
#    baidu_index = BaiduIndex(['故宫', '天坛'], '2017-09-30', '2018-10-01', 'province', province)

    """
        根据指定市区编码抓取关键词
    """
#    city = ["678", "691"]
#    baidu_index = BaiduIndex(['故宫', '天坛'], '2017-09-30', '2018-10-01', 'city', city)
Exemple #8
0

if __name__ == "__main__":
    'if i want to write ith file, i = i-1, start = i*10'
    start = 1870
    keywords = get_keywords(start)

    i = 187
    while (keywords and i < 200):
        print(i)
        i = i + 1
        start_time = '2011-01-01'
        end_time = '2020-04-01'

        'create a res_dataframe'
        date_index = pd.date_range(start_time, end_time)
        resDf = pd.DataFrame(index=date_index, columns=keywords)

        baidu_index = BaiduIndex(keywords, start_time, end_time)

        for index in baidu_index.get_index():
            'print(index)'
            for item in keywords:
                if (index['keyword'] in item):
                    resDf.loc[index['date'], item] = index['index']
                    break

        resDf.to_csv('./index/stkindex_' + str(i) + '.csv')
        start = start + 10
        keywords = get_keywords(start)
Exemple #9
0
from get_index import BaiduIndex

if __name__ == "__main__":
    """
    最多一次请求5个关键词
    """
    # 查看城市和省份的对应代码
    # print(BaiduIndex.city_code)
    # print(BaiduIndex.province_code)

    baidu_index = BaiduIndex(['张艺兴', 'lol', '极限挑战', 'python 免费空间'],
                             '2019-04-01', '2019-04-02')
    # for data in baidu_index('lol', 'all'):
    #     print(data)

    # 获取全部5个关键词的全部数据
    # print(baidu_index.result)
    # 获取1个关键词的全部数据
    print(baidu_index.result['python 免费空间'])
    # 获取1个关键词的移动端数据
    print(baidu_index.result['极限挑战']['all'])
    # 获取1个关键词的pc端数据
    print(baidu_index.result['极限挑战']['pc'])
Exemple #10
0
from get_index import BaiduIndex
import requests
from config import COOKIES, PROVINCE_CODE, CITY_CODE

if __name__ == "__main__":
    #测试
    #url = "http://i.baidu.com"
    #wcookie = {"BDUSS":COOKIES}
    #HTML = requests.get(url,cookies=wcookie).content
    #print(HTML)

    #正式
    keywords = ['猪肉']
    baidu_index = BaiduIndex(keywords, '2019-08-01', '2020-01-21')
    for index in baidu_index.get_index():
        print(index)
Exemple #11
0
from get_index import BaiduIndex

if __name__ == "__main__":
    """
    最多一次请求5个关键词
    """
    baidu_index = BaiduIndex(['张艺兴', 'lol', '极限挑战', '吃鸡'], '2016-10-01',
                             '2018-10-02')
    for data in baidu_index('lol', 'all'):
        print(data)

    # 获取全部5个关键词的全部数据
    print(baidu_index.result)
    # 获取1个关键词的全部数据
    print(baidu_index.result['极限挑战'])
    # 获取1个关键词的移动端数据
    print(baidu_index.result['极限挑战']['wise'])
    # 获取1个关键词的pc端数据
    print(baidu_index.result['极限挑战']['pc'])
Exemple #12
0
    # baidu_index = BaiduIndex("无限极", '2018-12-13', '2019-02-15', 0)
    #
    # 无限极 = pd.DataFrame()
    #
    # for data in baidu_index('无限极', 'all'):
    #     temp = pd.DataFrame(pd.Series(data)).T
    #     无限极 = pd.concat([无限极, temp])
    #
    # # 无限极.index = range(len(无限极.shape))
    # print(无限极)

    Df = pd.DataFrame()
    starttime = '2018-12-13'
    endtime = '2019-02-20'
    for i in ["无限极", "直销", "保健品", "权健", "华林酸碱平"]:
        baidu_index = BaiduIndex(i, starttime, endtime, 0)
        Temp = pd.DataFrame()
        for data in baidu_index(i, 'all'):
            temp = pd.DataFrame(pd.Series(data)).T
            Temp = pd.concat([Temp, temp])
        Temp['keyword'] = [i] * (Temp.shape[0])
        Df = pd.concat([Df, Temp])
    print(Df)
    Df.to_csv("e:/Df.csv", encoding="gbk")

    # # 获取全部5个关键词的全部数据
    # print(baidu_index.result)
    # # 获取1个关键词的全部数据
    # print(baidu_index.result['无限极'])
    # # 获取1个关键词的移动端数据
    # print(baidu_index.result['无限极']['wise'])
Exemple #13
0
#%%
from get_index import BaiduIndex
import pandas as pd

#if __name__ == "__main__":
keywords = ['比特币']
baidu_index = BaiduIndex(keywords, '2013-04-01', '2014-04-30')
baidu_index_dict = {'keyword': [], 'type': [], 'date': [], 'index': []}
for index in baidu_index.get_index():
    #print(index)
    if index['type'] == 'all':
        baidu_index_dict['keyword'].append(index['keyword'])
        baidu_index_dict['type'].append(index['type'])
        baidu_index_dict['date'].append(index['date'])
        baidu_index_dict['index'].append(index['index'])

baidu_index_df = pd.DataFrame(baidu_index_dict)
Exemple #14
0
# -*- coding: utf8 -*-

import codecs
from datetime import datetime
from get_index import BaiduIndex

if __name__ == "__main__":
    """
    最多一次请求5个关键词
    """
    # 查看城市和省份的对应代码
    # print BaiduIndex.city_code
    # print BaiduIndex.province_code

    baidu_index = BaiduIndex(['人民币贬值'], '2009-01-01', '2019-03-31')
    for data in baidu_index('人民币贬值', 'all'):
        if datetime.strptime(data["date"], "%Y-%m-%d").strftime("%w") != "0" and datetime.strptime(data["date"], "%Y-%m-%d").strftime("%w") != "6":
            print data["date"], ",", data["index"]
            with open("test.csv", 'ab') as f:
                f.write(codecs.BOM_UTF8)
                f.write('"{0}","{1}"\r'.format(data["date"], data["index"]))

        
    print '获取1个关键词的全部数据'
    baidu_index.result['人民币贬值']
    # print '获取1个关键词的移动端数据'
    # print(baidu_index.result['人民币贬值']['wise'])
    # print '获取1个关键词的pc端数据'
    # print(baidu_index.result['人民币贬值']['pc'])
Exemple #15
0
@Author: Johnson
@Email:[email protected]
@File: demo.py
"""
from get_index import BaiduIndex
import pandas as pd

if __name__ == "__main__":
    """
    最多一次请求5个关键词
    """
    # 查看城市和省份的对应代码
    # print(BaiduIndex.city_code)
    # print(BaiduIndex.province_code)

    baidu_index = BaiduIndex(["无限极", "直销", "保健品", "权健", "华林酸碱平"], '2018-12-13',
                             '2019-02-20', 0)

    无限极 = pd.DataFrame()

    for data in baidu_index('无限极', 'all'):
        temp = pd.DataFrame(pd.Series(data)).T
        无限极 = pd.concat([无限极, temp])

    # 无限极.index = range(len(无限极.shape))
    print(无限极)

    # # 获取全部5个关键词的全部数据
    # print(baidu_index.result)
    # # 获取1个关键词的全部数据
    # print(baidu_index.result['无限极'])
    # # 获取1个关键词的移动端数据
Exemple #16
0
from get_index import BaiduIndex

if __name__ == "__main__":
    """
    最多一次请求5个关键词
    """
    # 查看城市和省份的对应代码
    print(BaiduIndex.city_code)
    print(BaiduIndex.province_code)

    baidu_index = BaiduIndex(['找工作', '失业', '裁员'], '2011-01-01', '2016-10-01')
    for data in baidu_index('找工作', 'all'):
        print(data)

    # 获取全部5个关键词的全部数据
## print(baidu_index.result)
# 获取1个关键词的全部数据
    print(baidu_index.result['找工作'])
    # 获取1个关键词的移动端数据
## print(baidu_index.result['找工作']['wise'])
# 获取1个关键词的pc端数据
## print(baidu_index.result['找工作']['pc'])