import requests from lxml import etree import sys sys.path.append("/ROOT/www/spider/settings") from mysql import MySQLWrapper import logging import time logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='/ROOT/logs/chengyu.log', ) mysql = MySQLWrapper('db_spider') sys.setrecursionlimit(50000) #例如这里设置为一百万 def get_info(word): select_sql = 'SELECT * FROM m_media_chengyu WHERE f_Word=%s' res = mysql.fetchOne(select_sql, word) if res: return cookies = { 'BAIDUID': '9FBB3DDF9C1043EC573390790B08EA9E:FG=1', 'BIDUPSID': '9FBB3DDF9C1043EC573390790B08EA9E', 'PSTM': '1523842582', '__cfduid': 'dc50a3536ae2dfa84617d62f0ed220c001523846572',
import requests import json import sys sys.path.append("/ROOT/www/spider/settings") from mysql import MySQLWrapper import logging import time logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='/ROOT/logs/fund.log', ) mysql = MySQLWrapper('db_finance_shares') headers = { 'Accept': '*/*', 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://fund.10jqka.com.cn/datacenter/sy/', 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36', } def run_gpx(): urls = ['http://fund.ijijin.cn/data/Net/info/gpx_code_asc_0_0_1_9999_0_0_0_jsonp_g.html', 'http://fund.ijijin.cn/data/Net/info/zqx_code_asc_0_0_1_9999_0_0_0_jsonp_g.html', 'http://fund.ijijin.cn/data/Net/info/hhx_F009_desc_0_0_1_9999_0_0_0_jsonp_g.html', 'http://fund.ijijin.cn/data/Net/info/ETF_F009_desc_0_0_1_9999_0_0_0_jsonp_g.html', 'http://fund.ijijin.cn/data/Net/info/LOF_F009_desc_0_0_1_9999_0_0_0_jsonp_g.html',
'Referer': 'http://vip.stock.finance.sina.com.cn/q/go.php/vIR_SumRating/index.phtml', 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', } logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='log/sina_long.log', ) mysql = MySQLWrapper('db_Stock_Holding') def run(): select_sql = 'SELECT f_organization_code FROM t_Organization_Attention' result = mysql.fetchAll(select_sql) code_list = [] for item in result: a = str(item['f_organization_code']) if a[0] == '6': code_list.append('s_sh' + a) elif a[0] == '0' or a[0] == '3': code_list.append('s_sz' + a) if len(code_list) == 60: sina_stock_level_fast(code_list, 't_Organization_Attention') code_list = []
import sys sys.path.append("/ROOT/www/spider/settings") from mysql import MySQLWrapper import logging import time import json import re logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='/ROOT/logs/coincap.log', ) mysql = MySQLWrapper('db_cryptocurrency') def get_exchange(): select_sql = 'SELECT f_corrected_price FROM db_forex_source.t_whpj_source WHERE f_currency_name=%s' result = mysql.fetchOne(select_sql, u'美元') return float(result['f_corrected_price'] / 100) def get_message(exchange): headers = { 'Referer': 'http://coincap.io/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', }
import re import sys sys.path.append("/ROOT/www/spider/settings") from mysql import MySQLWrapper import logging import time import os logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='/ROOT/logs/gushiwen.log', ) mysql = MySQLWrapper('db_GuShiWen') foldername = '/ROOT/www/spider_pic/author/' + time.strftime( "%Y-%m", time.localtime(time.time())) if not os.path.exists(foldername): os.makedirs(foldername) headers = { 'authority': 'www.gushiwen.org', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding':
from lxml import etree import sys sys.path.append("/ROOT/www/spider/settings") from langconv import * from mysql import MySQLWrapper import logging import time logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='/ROOT/logs/stock.log', ) mysql = MySQLWrapper() cookies = { '_ga': 'GA1.2.829106724.1523845461', '_gid': 'GA1.2.661814915.1523845464', '__utma': '131925965.829106724.1523845461.1523845497.1523845497.1', '__utmc': '131925965', '__utmz': '131925965.1523845497.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)', '__utmt': '1', '__utmb': '131925965.7.10.1523845497', } headers = { 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9',
import requests import sys sys.path.append("/ROOT/www/spider/settings") from mysql import MySQLWrapper mysql = MySQLWrapper('db_weather') def run(): select_sql = 'SELECT f_City FROM t_city WHERE f_UID >= (SELECT MAX(f_UID) FROM t_city ) * RAND() LIMIT 5' result = mysql.fetchOne(select_sql) params = ( ('city',result['f_City']), ) response = requests.get('10.0.0.26:8080/server/weather',params=params) res = response.json() insert_sql = 'INSERT INTO t_weather(f_City,f_Json)VALUES(%s,%s)' select_sql = 'SELECT * FROM t_weather WHERE f_City=%s' update_sql = 'UPDATE t_weather SET f_Json=%s WHERE f_City=%s' flag = mysql.fetchOne(select_sql,city) if flag: mysql.execute(update_sql,res,city) else: mysql.execute(insert_sql,city,json) if __name__ == '__main__': run()
import sys sys.path.append(setting_path) from mysql import MySQLWrapper import logging import json import re logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename=log_path, ) mysql = MySQLWrapper('db_finance_shares') cookies = { 'UOR': 'www.baidu.com,tech.sina.com.cn,', 'SINAGLOBAL': '124.65.127.142_1524019116.251870', 'Apache': '124.65.127.142_1524019116.251872', 'lxlrtst': '1524018574_o', 'lxlrttp': '1524018574', 'U_TRS1': '0000000e.9b596696.5ad6b174.e0ad695d', 'U_TRS2': '0000000e.9b646696.5ad6b174.18fdc148', 'ULV': '1524019575077:2:2:2:124.65.127.142_1524019116.251872:1524019119080', 'FINANCE2': '83a6c35dc42b641f24ee430c69b8dd38', 'FIN_ALL_VISITED': 'sh600078', 'rotatecount': '1', 'FINA_V_S_2': 'sh600078',
headers = { 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/dzjy/index.phtml?p=2276', } logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', filename='/ROOT/logs/sina_finance.log', ) mysql = MySQLWrapper('db_Stock_Holding') # 机构持股汇总 def sina_stock_department(page): params = ( ('p', page), ) response = requests.get('http://vip.stock.finance.sina.com.cn/q/go.php/vComStockHold/kind/jgcg/index.phtml',params=params, headers=headers, cookies=cookies) selector = etree.HTML(response.content.decode('GBK')) result = selector.xpath('//*[@id="dataTable"]/tr') selecet_sql = 'SELECT * from t_Share_Holding where f_share_code=%s' insert_sql = 'INSERT INTO t_Share_Holding(f_year,f_month,f_share_name,f_share_code,f_num,f_num_balance,f_stock_percent,f_stock_percent_balance,f_current_stock_percent,f_current_percent_balance)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' f_year = u'2018年' f_month = u'一季报'
import requests from lxml import etree import sys sys.path.append("/ROOT/www/spider/settings") from mysql import MySQLWrapper import time import json import re mysql = MySQLWrapper('db_forex_source') def run(): headers = { 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.boc.cn/sourcedb/whpj/index_1.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', } response = requests.get('http://www.boc.cn/sourcedb/whpj/index.html', headers=headers) selector = etree.HTML(response.content.decode('utf-8')) result = selector.xpath('/html/body/div/div[3]/div[1]/div[2]/table/tr[position()>1]') select_sql = 'SELECT * FROM t_whpj_source WHERE f_currency_name=%s ' insert_sql = 'INSERT INTO t_whpj_source(f_currency_name,f_spot_buy_price,f_oof_buy_price,f_spot_sale_price,f_oof_sale_price,f_foreign_rate_price,f_corrected_price,f_start_time)VALUES(%s,%s,%s,%s,%s,%s,%s,%s)' update_sql = 'UPDATE t_whpj_source SET f_spot_buy_price=%s,f_oof_buy_price=%s,f_spot_sale_price=%s,f_oof_sale_price=%s,f_foreign_rate_price=%s,f_corrected_price=%s,f_start_time=%s where f_currency_name=%s' insert_sql_old = 'INSERT INTO t_whpj_source_copy(f_currency_name,f_spot_buy_price,f_oof_buy_price,f_spot_sale_price,f_oof_sale_price,f_foreign_rate_price,f_corrected_price,f_start_time)VALUES(%s,%s,%s,%s,%s,%s,%s,%s)' for item in result: name = item.xpath('td[1]/text()')[0] buy_price = get_text_from_xpath(item,'td[2]/text()')