Example #1
0
reload(sys)
sys.setdefaultencoding('utf-8')

# prepare: remove old files in 2_dividents

raw_sh = "../../../data/raw/sz/"
fileNameTemplate = raw_sh + "2_dividents/{0}.html"
urlTemplate = "http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/{0}.phtml"

downloadHeaders = {
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding':
    'gzip, deflate',
    'Connection':
    'close',
    'Host':
    'http://vip.stock.finance.sina.com.cn',
    'Referer':
    'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/000637.phtml',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

codes = securitydb.get_security_code()
for code in codes:
    # download the first report
    url = urlTemplate.format(code)
    fileName = fileNameTemplate.format(code)
    net_functions.download(url, None, fileName, sleepSeconds=5)
Example #2
0
    'Host':
    'query.sse.com.cn',
    'Referer':
    'http://www.sse.com.cn/assortment/stock/list/info/announcement/index.shtml?productId=600000',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

fileName_template = raw_sh + "2_annual_url/{0}_{1}_{2}.json"
'''
example:
{"beginDate":"2017-01-01","endDate":"2018-01-01","isNew":"1","isPagination":"true","jsonCallBack":"","keyWord":"","pageHelp":{"beginPage":1,"cacheSize":1,"data":[{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_nzy.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报摘要","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告摘要","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null},{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_n.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null}],"endDate":null,"endPage":5,"objectResult":null,"pageCount":1,"pageNo":1,"pageSize":25,"searchDate":null,"sort":null,"startDate":null,"total":2},"productId":"600000","reportType":"YEARLY","reportType2":"DQBG","result":[{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_nzy.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报摘要","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告摘要","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null},{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_n.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null}]}
'''

infos = securitydb.get_security_info()
current_year = datetime.date.today().year

for info in infos:
    code = info['code']
    listing_year = info['listingdate'].year
    startYear = listing_year

    while True:
        url = url_template.format(code, "{0}-01-01".format(startYear),
                                  "{0}-12-31".format(startYear + 2))
        fileName = fileName_template.format(code, startYear, startYear + 2)
        download(url, download_headers, fileName)

        startYear = startYear + 3
        if startYear > current_year: break
Example #3
0
    'Host':
    'query.sse.com.cn',
    'Referer':
    'http://www.sse.com.cn/assortment/stock/list/info/turnover/index.shtml?COMPANY_CODE=601668',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

year = local_functions.get_current_year()
year1 = str(year - 1)
year2 = str(year - 2)
fileName_template = raw_sh + "5_price/{0}_{1}.json"
search_info_arr = [{
    'inYear': year1,
    'inMonth': year1 + '12',
    'searchDate': year1 + '-12-31'
}, {
    'inYear': year2,
    'inMonth': year2 + '12',
    'searchDate': year2 + '-12-31'
}]
codes = securitydb.get_valuable_security_code()

for code in codes:
    for search_info in search_info_arr:
        url = url_template.format(code, search_info['inMonth'],
                                  search_info['inYear'],
                                  search_info['searchDate'])
        fileName = fileName_template.format(code, search_info['inYear'])
        net_functions.download(url, download_headers, fileName, 5)
Example #4
0
download_headers = {
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding':
    'gzip, deflate',
    'Connection':
    'close',
    'Host':
    'money.finance.sina.com.cn',
    'Referer':
    'http://money.finance.sina.com.cn/corp/go.php/vFD_ProfitStatement/stockid/600006/ctrl/2013/displaytype/4.phtml',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

year = local_functions.get_current_year()
codes = securitydb.get_valuable_security_code()
for code in codes:
    # download the first report
    url = url_template.format(code, year)
    fileName = fileName_template.format(code, year)
    net_functions.download(url, None, fileName)
    # get other report urls
    page = local_functions.read_html(fileName)
    year_urls = local_functions.get_report_link_year_and_urls(page)
    # download other reports
    for year, url in year_urls:
        fileName = fileName_template.format(code, year)
        net_functions.download(url, download_headers, fileName, 1)
Example #5
0
reload(sys)
sys.setdefaultencoding('utf-8')

url_shA = "http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=1"
url_shB = "http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=2"
raw_sh = "../../../data/raw/sh/"

downloadHeaders = {
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding':
    'gzip, deflate',
    'Host':
    'query.sse.com.cn',
    'Referer':
    'http://www.sse.com.cn/assortment/stock/list/share',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

url_file_arr = [{
    'url': url_shA,
    'file': raw_sh + "1_security/shA.xls"
}, {
    'url': url_shB,
    'file': raw_sh + "1_security/shB.xls"
}]

for url_file in url_file_arr:
    net_functions.download(url_file['url'], downloadHeaders, url_file['file'])
import local_functions
import securitydb

import sys
reload(sys)
sys.setdefaultencoding('utf-8')

urlTemplate = "http://query.sse.com.cn/security/stock/queryCompanyStockStruct.do?jsonCallBack=&isPagination=false&companyCode={0}"
raw_sh = "../../../data/raw/sh/"

downloadHeaders = {
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding':
    'gzip, deflate',
    'Host':
    'query.sse.com.cn',
    'Referer':
    'http://www.sse.com.cn/assortment/stock/list/info/capital/index.shtml?COMPANY_CODE=603993',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

fileNameTemplate = raw_sh + "6_stock_struct/{0}.json"
codes = securitydb.get_valuable_security_code()

for code in codes:
    url = urlTemplate.format(code)
    fileName = fileNameTemplate.format(code)
    net_functions.download(url, downloadHeaders, fileName, 10)