# -*- coding: utf-8 -*- import os from Move_2_Nas import Move2Nas from Initialization import Initialization os.chdir('/root/spiderItem/australia/australia_l') Initialization().InitializeMain2() os.system("scrapy crawl australiaExcuteFirst") os.system("scrapy crawl DownloadPdf-ASX") os.system("scrapy crawl DownloadPdf-NSX") os.system("scrapy crawl BasicInfo_NSX") try: Initialization().InitializeMain() except FileNotFoundError: pass Move2Nas().Move2NasMain("/data/spiderData/australia", "/homes3/Australia/")
import os from Move_2_Nas import Move2Nas from Initialization import Initialization os.chdir('/root/spiderItem/HongKong/HCK_l') Initialization().InitializeMain2() os.system("scrapy crawl HCK_Execute1") os.system("scrapy crawl HCK_Execute2") os.system("scrapy crawl HCK_pdf_spider") os.system("scrapy crawl HCK_information") try: Initialization().InitializeMain() except FileNotFoundError: pass Move2Nas().Move2NasMain("/data/spiderData/HongKong", "/homes3/HongKong/")
# -*- coding: utf-8 -*- import os from Move_2_Nas import Move2Nas from Initialization import Initialization os.chdir("/root/spiderItem/SouthAfrica") Initialization().InitializeMain2() os.system("scrapy crawl sfAll") os.system("scrapy crawl sfFile") try: Initialization().InitializeMain() except FileNotFoundError: pass Move2Nas().Move2NasMain("/data/spiderData/SouthAfrica", "/homes3/ZAF/")
""" 1.spider bond 负责下载巨潮上深市债券列表及基本信息,由于数据量不是很大,更新策略是有则根据bond_code更新数据,无则插入新数据 2.spider add_szse_bond 负责下载深交所上债券列表及部分信息(深交所基本信息不全)并和巨潮上的比对去重后插入数据库,更新原则同上 3.spider fileProUpdate 负责下载并更新债券公告及问询函 监管措施 纪律处分。 策略是拿到各个类型下的最新时间的url 去和原网页从上 往下比对,当遇到相同的url后不再往后遍历,插入数据库并做了一层doc_source_url是否存在的判断(避免最新时间下的url有多个, 会出现下载重复的情况)。 """ import os from Move_2_Nas import Move2Nas from Initialization import Initialization os.chdir('/root/spiderItem/shenzhenBond') Initialization().InitializeMain2() os.system("scrapy crawl bond") os.system("scrapy crawl add_szse_bond") os.system("scrapy crawl fileProUpdate") try: Initialization().InitializeMain() except FileNotFoundError: pass Move2Nas().Move2NasMain("/data/spiderData/shenzhenBond", "/homes3/ChinaSecurities/")
# -*- coding: utf-8 -*- import os import pymysql from Initialization import Initialization from Move_2_Nas import Move2Nas conn = pymysql.connect(host="10.100.4.99", port=3306, db="opd_common", user="******", passwd="OPDATA", charset="utf8") cursor = conn.cursor() sql = "delete from gbr_not_parsing" cursor.execute(sql) conn.commit() cursor.close() conn.close() print("数据库初始化完成") os.chdir("/root/spiderItem/morningStarGbr") Initialization().InitializeMain2() os.system("python3 /root/spiderItem/morningStarGbr/GBR/fastSpider.py") os.system("scrapy crawl fileDownload") try: Initialization().InitializeMain() except FileNotFoundError: pass Move2Nas().Move2NasMain("/data/spiderData/morningStar_gbr", "/homes3/GBR/")
import os from Move_2_Nas import Move2Nas from Initialization import Initialization os.chdir("/root/spiderItem/india/india_r") Initialization().InitializeMain2() os.system("scrapy crawl downloadPdf_BSE_A") os.system("scrapy crawl downloadExcel_BSE") os.system("scrapy crawl downloadPdf_BSE_Qv2") os.system("scrapy crawl downloadExcel_NSE") os.system("scrapy crawl downloadZip_NSE") try: Initialization().InitializeMain() except: pass Move2Nas().Move2NasMain("/data/spiderData/india", "/homes3/Inida/")
import os from Move_2_Nas import Move2Nas from Initialization import Initialization os.chdir("/root/spiderItem/germany/germany_l") Initialization().InitializeMain2() os.system("scrapy crawl FrankfurtCompanyList") os.system("scrapy crawl germanyExcute2") os.system("scrapy crawl Frankfurtpdf") os.system("scrapy crawl FrankfurtBasicInfo") try: Initialization().InitializeMain() except FileNotFoundError: pass Move2Nas().Move2NasMain("/data/spiderData/germany", "/homes3/Germany/")