months.append(emonth) return months months = get_emonths()[:11] for m in months: url = 'https://ustr.gov/about-us/policy-offices/press-office/press-releases/2018/%s' % m headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/62.0' } res = requests.get(url, headers=headers) data = res.text doc = pq(data) infolist = doc('.listing li').items() for i in infolist: try: link = 'https://ustr.gov/' + i('a').attr('href') title = i('a').text() time = i.text() ztitle = translate_words(title) if '中国' in ztitle: content = [link, ztitle, title, time] save_to_csv('ustr涉中公告', content) else: pass except: pass # # print(link,ztitle,time)
for t in tableinfo: res = t.text() li.append(res) return li # url = 'https://www.p2p001.com/Netloan/shownews/id/22845.html' # res = parse_page(url) # print(res) # for i in range(0, 112): try: links = get_links(i) for l in links: res = parse_page(l) save_to_csv('wangdai_data', res) except: print(i) #def parse_page(url): # table = pd.read_html(url)[0] # date = table.iloc[1, :].values[0] # df = table.iloc[3:, :] # df.columns = table.iloc[2, :].values # dates = date.strip('统计日期:').strip(')').split('(') # df.index = df.ix[:, 0] # df = df[['利率指数', '期限指数', '人气指数', '发展指数']] # d = {} # d['日期'] = dates[0] # d['星期'] = dates[1] # dicts = df.to_dict()
from pyquery import PyQuery as pq import pandas as pd from Office.maketime import nowtime typelist = ['DE000SLA2514', 'DE000SLA0FS4', 'DE000SLA0RL4'] for t in typelist: url = 'https://www.solactive.com/indices/?indexhistory=%s&indexhistorytype=max' % t headers = { 'Host': 'www.solactive.com', 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, br', 'Cookie': 'cookieconsent_status=allow; _ga=GA1.2.764963108.1551520336; _gid=GA1.2.1014728705.1551520336; _gat=1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 'Referer': 'https//www.solactive.com/indices/?index=DE000SLA4304' } html = requests.get(url, headers=headers) data = html.json() for d in data: date = nowtime(d['timestamp'] / 1000) li = [date] for k, v in d.items(): li.append(v) save_to_csv('index', li) # html = requests.post(url, headers=headers, data=data) # doc = pq(html.text)
return res def qiudao(expression): x = symbols('x') res = diff(expression, x) return res # # print(qiudao('e**x')) def get_c(cost,debt): x = symbols('x') c = solve(((x - x ** 2 / 18 + (((1 - cost) * x ** 2) / 36))) / 0.05 - debt, x) return c vu = 135 for cost in np.arange(0.1,1.1,0.1): for debt in range(10,200,10): c = get_c(cost=cost,debt=debt) li = c +[cost,debt] save_to_csv('gscw',li) # cost = 0.5 # debt = 10 # c = get_c(cost,debt) # li = # for v in c: # if v >0: # print(v)
import requests import re from pyquery import PyQuery as pq import pandas as pd from Office.save_file import save_to_csv def get_colleges(): url = 'http://www.moe.gov.cn/jyb_zzjg/moe_347/201708/t20170828_312562.html' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/62.0' } res = requests.get(url, headers=headers) res.encoding = res.apparent_encoding data = res.text doc = pq(data) data = doc('.TRS_Editor table li').items() collegs = {} for d in data: name = d.text() website = d('a').attr('href') collegs[name] = website return collegs res = get_colleges() for k, v in res.items(): li = [k, v] save_to_csv('colleges', li)
) as f: variblelist = [str(x) for x in variblelist] content = '\n'.join(variblelist) + '\n' + '\n' f.write(content) for d in df[12:]: print(d) url = 'https://news.baidu.com/' driver = webdriver.Chrome( executable_path= r'C:\Users\xfs9619\AppData\Local\Google\Chrome\Application\chromedriver.exe' ) driver.get(url) driver.find_element_by_xpath('//*[@id="ww"]').send_keys(d) driver.find_element_by_xpath('//*[@id="s_btn_wr"]').click() data = driver.page_source doc = pq(data) li = doc('.result').items() for l in li: #print(l) title = l('h3 a').text() link = l('h3 a').attr('href') info = l('p').text().split(' ') source = info[0] date = info[1] content = get_fullcontent(link) li = [title, link, source, date, content] save_to_csv('%s' % d, li) save_to_text('%s.txt' % d, li)