Example #1
0
        months.append(emonth)
    return months


months = get_emonths()[:11]
for m in months:
    url = 'https://ustr.gov/about-us/policy-offices/press-office/press-releases/2018/%s' % m
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/62.0'
    }
    res = requests.get(url, headers=headers)
    data = res.text
    doc = pq(data)
    infolist = doc('.listing li').items()
    for i in infolist:
        try:
            link = 'https://ustr.gov/' + i('a').attr('href')
            title = i('a').text()
            time = i.text()
            ztitle = translate_words(title)
            if '中国' in ztitle:
                content = [link, ztitle, title, time]
                save_to_csv('ustr涉中公告', content)
            else:
                pass
        except:
            pass
    #
    #     print(link,ztitle,time)
Example #2
0
        for t in tableinfo:
            res = t.text()
            li.append(res)
    return li


# url = 'https://www.p2p001.com/Netloan/shownews/id/22845.html'
# res = parse_page(url)
# print(res)
#
for i in range(0, 112):
    try:
        links = get_links(i)
        for l in links:
            res = parse_page(l)
            save_to_csv('wangdai_data', res)
    except:
        print(i)

#def parse_page(url):
# table = pd.read_html(url)[0]
# date = table.iloc[1, :].values[0]
# df = table.iloc[3:, :]
# df.columns = table.iloc[2, :].values
# dates = date.strip('统计日期:').strip(')').split('(')
# df.index = df.ix[:, 0]
# df = df[['利率指数', '期限指数', '人气指数', '发展指数']]
# d = {}
# d['日期'] = dates[0]
# d['星期'] = dates[1]
# dicts = df.to_dict()
Example #3
0
from pyquery import PyQuery as pq
import pandas as pd
from Office.maketime import nowtime
typelist = ['DE000SLA2514', 'DE000SLA0FS4', 'DE000SLA0RL4']
for t in typelist:
    url = 'https://www.solactive.com/indices/?indexhistory=%s&indexhistorytype=max' % t
    headers = {
        'Host': 'www.solactive.com',
        'Connection': 'keep-alive',
        'X-Requested-With': 'XMLHttpRequest',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Cookie':
        'cookieconsent_status=allow; _ga=GA1.2.764963108.1551520336; _gid=GA1.2.1014728705.1551520336; _gat=1',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
        'Referer': 'https//www.solactive.com/indices/?index=DE000SLA4304'
    }
    html = requests.get(url, headers=headers)
    data = html.json()
    for d in data:
        date = nowtime(d['timestamp'] / 1000)
        li = [date]
        for k, v in d.items():
            li.append(v)
        save_to_csv('index', li)

# html = requests.post(url, headers=headers, data=data)
# doc = pq(html.text)
Example #4
0
    return res

def qiudao(expression):
    x = symbols('x')
    res = diff(expression, x)
    return res
#
# print(qiudao('e**x'))


def get_c(cost,debt):
    x = symbols('x')
    c = solve(((x - x ** 2 / 18 + (((1 - cost) * x ** 2) / 36))) / 0.05 - debt, x)
    return c

vu = 135
for cost in np.arange(0.1,1.1,0.1):
    for debt in range(10,200,10):
        c = get_c(cost=cost,debt=debt)
        li = c +[cost,debt]
        save_to_csv('gscw',li)

# cost = 0.5
# debt = 10
# c = get_c(cost,debt)
# li =

# for v in c:
#     if v >0:
#         print(v)
Example #5
0
import requests
import re
from pyquery import PyQuery as pq
import pandas as pd
from Office.save_file import save_to_csv


def get_colleges():
    url = 'http://www.moe.gov.cn/jyb_zzjg/moe_347/201708/t20170828_312562.html'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/62.0'
    }
    res = requests.get(url, headers=headers)
    res.encoding = res.apparent_encoding
    data = res.text
    doc = pq(data)
    data = doc('.TRS_Editor table li').items()
    collegs = {}
    for d in data:
        name = d.text()
        website = d('a').attr('href')
        collegs[name] = website
    return collegs


res = get_colleges()
for k, v in res.items():
    li = [k, v]
    save_to_csv('colleges', li)
Example #6
0
    ) as f:
        variblelist = [str(x) for x in variblelist]
        content = '\n'.join(variblelist) + '\n' + '\n'
        f.write(content)


for d in df[12:]:
    print(d)
    url = 'https://news.baidu.com/'
    driver = webdriver.Chrome(
        executable_path=
        r'C:\Users\xfs9619\AppData\Local\Google\Chrome\Application\chromedriver.exe'
    )
    driver.get(url)
    driver.find_element_by_xpath('//*[@id="ww"]').send_keys(d)
    driver.find_element_by_xpath('//*[@id="s_btn_wr"]').click()
    data = driver.page_source
    doc = pq(data)
    li = doc('.result').items()
    for l in li:
        #print(l)
        title = l('h3 a').text()
        link = l('h3 a').attr('href')
        info = l('p').text().split(' ')
        source = info[0]
        date = info[1]
        content = get_fullcontent(link)
        li = [title, link, source, date, content]
        save_to_csv('%s' % d, li)
        save_to_text('%s.txt' % d, li)