Exemple #1
0
def RK_LASD(x, pt):
    name = 'Results/RK2_201' + str(x) + '.xls'
    print('Writing to ' + name + ' ... ...')
    initExcel_rk2(name)
    url = URL + str(x) + '_'
    for i in range(4):
        url1 = url + str(i) + '.html'
        html = mr.getOnePage(url1, 'utf-8')
        pattern = re.compile(pt[i], re.S)
        result = re.findall(pattern, html)
        if i == 0:
            t = 3
            l = len(result[0])
            for item in result:
                for j in range(l):
                    mr.writeToExcel(name, t, j, item[j])
                print(t - 2)
                t += 1
        else:
            t = 3
            l = len(result[0])
            k = RC2[i] + 1
            for item in result:
                for j in range(l):
                    mr.writeToExcel(name, t, j + k, item[j])
                print(t - 2)
                t += 1
Exemple #2
0
def QS0():
    for i in range(11):
        url = 'https://www.universityrankings.ch/results?ranking=QS&region=Asia&year=20' + str(
            10 + i) + '&q=China+'
        html = mr.getOnePage(url, 'utf-8')
        pattern = re.compile(PATTERN, re.S)
        result = re.findall(pattern, html)
        url = 'https://www.universityrankings.ch' + result[0]
        file_name = 'QS20' + s + '.csv'
        mr.saveOneFile(file_name, url)
Exemple #3
0
def XYH2017():
    name = 'Results/XYH2017-2018.xls'
    print('Writing to ' + name + ' ... ...')
    mr.initExcel(ROW2017, name)
    url = 'https://www.dxsbb.com/news/1383.html'
    html = mr.getOnePage(url, 'gbk')
    pattern = re.compile(
        '<tr height="19"><td x:num="(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td x:num="(.*?)</td></tr>',
        re.S)
    result = re.findall(pattern, html)
    t = 1
    pattern1 = re.compile("[\u4e00-\u9fa5]+", re.S)
    pattern2 = re.compile("\d+\.?\d*", re.S)
    for item in result:
        for i in range(5):
            if i == 1:
                r = re.findall(pattern1, item[i])
                mr.writeToExcel(name, t, i, "".join(r))
            elif i == 0 or i == 4:
                r = re.findall(pattern2, item[i])
                mr.writeToExcel(name, t, i, r[0])
            else:
                mr.writeToExcel(name, t, i, item[i])
        print(t)
        t += 1
Exemple #4
0
def Times(x):
	name = 'Results/Times201' + str(x) + '.xls'
	print('Writing to ' + name + ' ... ...')
	mr.initExcel(ROW, name)
	url = URL1 + str(x) + URL2
	html = mr.getOnePage(url, 'utf-8')
	pattern = re.compile(PATTERN1, re.S)
	result = re.findall(pattern, html)
	t = 1
	l = len(result[0])
	for item in result:
		for i in range(l):
			mr.writeToExcel(name, t, i, item[i])
		print(t)
		t += 1
	url = URL1 + str(x) + URL3
	html = mr.getOnePage(url, 'utf-8')
	pattern = re.compile(PATTERN2, re.S)
	result = re.findall(pattern, html)
	t = 1
	l = len(result[0])
	for item in result:
		for i in range(l):
			mr.writeToExcel(name, t, i + 6, item[i])
		print(t)
		t += 1
Exemple #5
0
def RK_ZHDX(x, row, pt):
    name = 'Results/RK201' + str(x) + '.xls'
    print('Writing to ' + name + ' ... ...')
    mr.initExcel(row, name)
    url = URL + str(x) + '.html'
    html = mr.getOnePage(url, 'utf-8')
    pattern = re.compile(pt, re.S)
    result = re.findall(pattern, html)
    t = 1
    l = len(result[0])
    for item in result:
        for i in range(l):
            mr.writeToExcel(name, t, i, item[i])
        print(t)
        t += 1
Exemple #6
0
def Scholars():
    count = [0, 0, 0]
    url = 'http://www.nenu.edu.cn/576/list.htm'
    global driver  # global driver
    fireFoxOptions = webdriver.FirefoxOptions()
    fireFoxOptions.set_headless()
    driver = webdriver.Firefox(firefox_options=fireFoxOptions)
    # driver = webdriver.Firefox()
    driver.get(url)
    elements = getEleById('wp_content_w8_0')
    elements = elements.text
    driver.close()
    del driver
    count[0] = mr.parse2(elements, '院士', '荣誉教授')
    count[1] += parse3(elements, '教育部“长江学者奖励计划”特聘教授', '教育部“长江学者奖励计划”青年学者')
    count[1] += parse3(elements, '教育部“长江学者奖励计划”青年学者', '教育部“长江学者和创新团队发展计划”带头人')
    count[1] += parse3(elements, '教育部“长江学者和创新团队发展计划”带头人', '中国科学院“百人计划”')
    count[2] += parse3(elements, '国家“万人计划”哲学社会科学领军人才', '国家“万人计划”科技创新领军人才')
    count[2] += parse3(elements, '国家“万人计划”科技创新领军人才', '国家“万人计划”教学名师')
    count[2] += parse3(elements, '国家“万人计划”教学名师', '国家杰出青年科学基金获得者')
    count[2] += parse3(elements, '国家“万人计划”青年拔尖人才', '国家优秀青年科学基金获得者')
    print(count)
    writeToExcel(4, count[0])
    writeToExcel(5, count[1])
    writeToExcel(7, count[2])
def YangtzeRiverScholars():
    global count_3
    url = 'http://www.math.lb.pku.edu.cn/jsdw/rcjh/index.htm'
    html = mr.getOnePage(url)
    for item in parseOnePage_YangtzeRiverScholars(html):
        print(item)
        # writeToFile(YANG, item)
        count_3 += 1
def Academician(offset):
    global count_2
    url = 'http://www.math.lb.pku.edu.cn/jsdw/zgkxyys/index' + offset + '.htm'
    html = mr.getOnePage(url)
    for item in parseOnePage_Academician(html):
        print(item)
        # writeToFile(ACA, item)
        count_2 += 1
Exemple #9
0
def Library():
    url = 'http://lib.csu.edu.cn/bgjs.jhtml'
    html = mr.getOnePage(url)
    pattern = re.compile('纸质文献总量(.*?)万余册', re.S)
    item = re.findall(pattern, html)[0]
    count1 = int(float(item) * 10000)
    print('中外文藏书合计: ' + str(count1))
    writeToExcel(37, count1)
def Library():
	url = 'http://www.lib.sdu.edu.cn/page/about.html'
	html = mr.getOnePage(url)
	pattern = re.compile('馆藏纸质文献(.*?)万余册', re.S)
	item = re.findall(pattern, html)[0]
	count1 = int(float(item) * 10000)
	print('中外文藏书合计: ' + str(count1))
	writeToExcel(37, count1)
Exemple #11
0
def RecruitmentProgram():
    global count_4
    url = 'http://www.math.lb.pku.edu.cn/jsdw/rcjh/index.htm'
    html = mr.getOnePage(url)
    for item in parseOnePage_RecruitmentProgram(html):
        print(item)
        # writeToFile(REC, item)
        count_4 += 1
Exemple #12
0
def Scholars():
	url = 'http://math.jlu.edu.cn/szdw/spys.htm'
	html = mr.getOnePage(url)
	pattern = re.compile('中国科学院院士', re.S)
	items = re.findall(pattern, html)
	count = len(items)
	print('院士数量: ' + str(count))
	writeToExcel(4, count)
Exemple #13
0
def Professor():
    count = [0, 0, 0]
    departments = ['sxyyysxx', 'xxyjskxx', 'glytjxx', 'gdsxjxyyjzx']
    for dep in departments:
        c = parseOnePage_Professor(dep)
        print(c)
        count = mr.listAdd(count, c)
    print(count)
    for i, item in enumerate(count):
        writeToExcel(i + 1, item)
Exemple #14
0
def Scholars():
    url = 'http://math.bnu.edu.cn/jzg/rcjh/index.htm'
    html = mr.getOnePage(url)
    pattern = re.compile('长江学者特聘教授:(.*?)<br />', re.S)
    items = re.findall(pattern, html)[0]
    pattern = re.compile('([\u4E00-\u9FA5][\u4e00-\u9fa5\\s][\u4e00-\u9fa5])',
                         re.S)
    count = len(re.findall(pattern, items))
    print('长江学者人数: ' + str(count))
    writeToExcel(5, count)
Exemple #15
0
def main():
	mr.initExcel()
	papers.papers()
	Peking_University.PKU()
	Sichuan_University.SCU()
	Fudan_University.FU()
	Central_South_University.CSU()
	Sun_Yat-sen_University.SYSU()
	Shandong_University.SDU()
	University_of_Science_and_Technology_of_China.USTC()
	Shanghai_Jiao_Tong_University.SJTU()
	Northest_Normal_University.NENU()
	Jilin_University.JLU()
	Nankai_University.NKU()
	Capital_Normal_University.CNU()
	Beijing_Normal_University.BNU()
	Tsinghua_University.THU()
	Awards.AWD()
	Others.OTH()
Exemple #16
0
def Library():
    global count_8
    url = 'http://www.math.lb.pku.edu.cn/kxyj/ytsg/index.htm'
    html = mr.getOnePage(url)
    pattern = re.compile(
        '现有纸版文献.*?"font-size:16px">(.*?)</span>.*?外文期刊.*?"font-size:16px">(.*?)</span>.*?中文期刊.*?"font-size:16px">(.*?)</span>',
        re.S)
    item = re.findall(pattern, html)[0]
    count_8.append(item[0])
    count_8.append(item[1] + item[2])
Exemple #17
0
def parseOnePage_Professor(dep):
    positions = ['js', 'fjs', 'js1']
    count = [0, 0, 0]
    for i, pos in enumerate(positions):
        url = 'http://math.csu.edu.cn/szdw/' + dep + '/' + pos + '.htm'
        html = mr.getOnePage(url)
        pattern = re.compile('font-size:9pt', re.S)
        items = re.findall(pattern, html)
        count[i] = int(len(items) / 2)
    return count
def Library():
    url = 'http://lib.ustc.edu.cn/%e6%9c%ac%e9%a6%86%e6%a6%82%e5%86%b5/%e6%9c%ac%e9%a6%86%e7%ae%80%e4%bb%8b/'
    html = mr.getOnePage(url)
    pattern = re.compile('实体馆藏中外文书刊(.*?)万册.*?中外文电子期刊近(.*?)万种', re.S)
    item = re.findall(pattern, html)[0]
    count1 = int(float(item[0]) * 10000)
    print('中外文藏书合计: ' + str(count1))
    writeToExcel(37, count1)
    count2 = int(float(item[1]) * 10000)
    print('中外文期刊种类: ' + str(count2))
    writeToExcel(40, count2)
def Professors():
    count = [0, 0, 0]
    url = 'http://math.ustc.edu.cn/new/teachers.php'
    global driver
    fireFoxOptions = webdriver.FirefoxOptions()
    fireFoxOptions.set_headless()
    driver = webdriver.Firefox(firefox_options=fireFoxOptions)
    # driver = webdriver.Firefox()
    driver.get(url)
    element = getEleByXpath(
        '/html/body/table[3]/tbody/tr/td[3]/table[2]/tbody/tr[3]/td')
    element = element.text
    driver.close()
    del driver
    count[0] = mr.parse2(element, '教  授', '访问教授')
    count[1] = mr.parse2(element, '副教授', '特任副研究员')
    count[2] = mr.parse2(element, '讲师', '博士后')
    print(count)
    for i, item in enumerate(count):
        writeToExcel(i + 1, item)
Exemple #20
0
def Library():
    url = 'http://www.library.nenu.edu.cn/Menu/AboutUs/BGGK/TSGJJ.aspx'
    html = mr.getOnePage(url)
    pattern = re.compile(
        '藏书总量约.*?(\d+\.\d+).*?万册。图书.*?电子期刊.*?>(\d+)</span>.*?种,订购', re.S)
    items = re.findall(pattern, html)[0]
    count1 = int(float(items[0]) * 10000)
    print('中外文藏书合计: ' + str(count1))
    writeToExcel(37, count1)
    print('中外文期刊种类: ' + items[1])
    writeToExcel(40, int(items[1]))
Exemple #21
0
def Library():
    url = 'http://www.library.fudan.edu.cn/60/list.htm'
    html = mr.getOnePage(url)
    pattern = re.compile(
        '馆藏纸本文献资源约</span><span lang="EN-US" style="color:#333333;">(.*?)</span>.*?中文报刊</span><span lang="EN-US" style="color:#333333;">(.*?)</span>.*?外文报刊</span><span lang="EN-US" style="color:#333333;">(.*?)</span>',
        re.S)
    items = re.findall(pattern, html)[0]
    count1 = int(float(items[0]) * 10000)
    print('中外文藏书合计: ' + str(count1))
    writeToExcel(37, count1)
    count2 = int(items[1]) + int(items[2])
    print('中外文期刊种类: ' + str(count2))
    writeToExcel(40, count2)
def Library():
    url = 'http://lib.tsinghua.edu.cn/about/collection.html'
    html = mr.getOnePage(url)
    pattern = re.compile('实体馆藏总量约(.*?)万册(件).*?各类数据库(\d+)个;电子期刊(.*?)万种', re.S)
    items = re.findall(pattern, html)[0]
    count1 = int(float(items[0]) * 10000)
    print('中外文藏书合计: ' + str(count1))
    writeToExcel(37, count1)
    count2 = int(float(items[2]) * 10000)
    print('中外文期刊种类: ' + str(count2))
    writeToExcel(40, count2)
    print('购买数据库数量: ' + items[1])
    writeToExcel(39, int(items[1]))
Exemple #23
0
def Projects():
    count = [0, 0]
    url = 'http://www.nenu.edu.cn/273/list.htm'
    html = mr.getOnePage(url)
    count[0] += parse4(html, '由国家社科基金资助设立的重大项目:', '由教育部社科司资助设立的重大课题:')
    count[0] += parse4(html, '由国家自然科学基金资助的、面向世界科学前沿的重大基础研究项目,如:',
                       '由科技部资助的以国家重大需求为导向的重大科学问题研究项目(含课题),如:')
    count[1] += parse4(html, '由教育部社科司资助设立的重大课题:', '科技项目')
    count[1] += parse4(html, '由科技部资助的以国家重大需求为导向的重大科学问题研究项目(含课题),如:', '')
    print('国家科研项目数', count[0])
    writeToExcel(25, count[0])
    print('省部科研项目数', count[1])
    writeToExcel(26, count[1])
Exemple #24
0
def Library():
	url = 'http://lib.jlu.edu.cn/portal/about/about.aspx'
	html = mr.getOnePage(url)
	pattern = re.compile('各类纸质书刊(\d+)万册,其中.*?订购中文期刊(\d+)种,外文期刊(\d+)种,报.*?西文文献数据库(\d+)种,中文数据库(\d+)种,中', re.S)
	items = re.findall(pattern, html)[0]
	count1 = int(float(items[0]) * 10000)
	print('中外文藏书合计: ' + str(count1))
	writeToExcel(37, count1)
	count2 = int(items[1]) + int(items[2])
	print('中外文期刊种类: ' + str(count2))
	writeToExcel(40, count2)
	count3 = int(items[3]) + int(items[4])
	print('购买数据库数量: ' + str(count3))
	writeToExcel(39, count3)
Exemple #25
0
def Library():
    url = 'http://www.lib.bnu.edu.cn/content/guan-chang-ji-yu'
    html = mr.getOnePage(url)
    pattern = re.compile(
        '纸本文献总量达(.*?)万余册,中外文全文电子期刊(\d+)万余种,中外文.*?引进中外文数据库(\d+)个,自', re.S)
    items = re.findall(pattern, html)[0]
    count1 = int(float(items[0]) * 10000)
    print('中外文藏书合计: ' + str(count1))
    writeToExcel(37, count1)
    count2 = int(float(items[1]) * 10000)
    print('中外文期刊种类: ' + str(count2))
    writeToExcel(40, count2)
    print('购买数据库数量: ' + items[2])
    writeToExcel(39, int(items[2]))
Exemple #26
0
def ResearchAward():
    global count_5, count_6, count_7
    url = 'http://www.math.lb.pku.edu.cn/kxyj/kyjl/index.htm'
    html = mr.getOnePage(url)
    for item in parseOnePage_ResearchAward_1(html):
        print(item)
        # writeToFile(RES, item)
        count_5 += 1
    for item in parseOnePage_ResearchAward_2(html):
        print(item)
        # writeToFile(RES, item)
        count_6 += 1
    for item in parseOnePage_ResearchAward_3(html):
        print(item)
        # writeToFile(RES, item)
        count_7 += 1
Exemple #27
0
def Professor(offset):
    global count_1
    url = 'http://www.math.lb.pku.edu.cn/jsdw/js_20180628175159671361/index' + str(
        offset) + '.htm'
    html = mr.getOnePage(url)
    for item in parseOnePage_Professor(html):
        print(item)
        # writeToFile(PRO, item)
        if item['position'] == '教授':
            count_1[0] += 1
        elif item['position'] == '副教授':
            count_1[1] += 1
        elif item['position'] == '讲师':
            count_1[2] += 1
        else:
            count_1[3] += 1
Exemple #28
0
def Professor():
    url = 'http://sms.nankai.edu.cn/5542/list.htm'
    html = mr.getOnePage(url)
    pattern = re.compile(
        '数学学科现有教师(\d+)人,教授(\d+)人、博士.*?博士学位的(\d+)人。其中,中国科学院院士(\d+)人、第三', re.S)
    items = re.findall(pattern, html)[0]
    count1 = items[1]
    print('教授数量: ' + str(count1))
    writeToExcel(1, count1)
    count2 = int(items[2]) / int(items[0])
    count2 = '%.2f%%' % (count2 * 100)
    print('教师博士数量: ' + str(count2))
    writeToExcel(8, count2)
    count3 = items[3]
    print('院士数量: ' + str(count3))
    writeToExcel(4, count3)
Exemple #29
0
def ResearchAward():
    count = [0, 0, 0]
    url = 'http://www.nenu.edu.cn/275/list.htm'
    html = mr.getOnePage(url)
    pattern = re.compile(
        '学术专著(\d+)部,获得省部级科研奖励(\d+)项。6部著.*?文库》;(\d+)项成果获得全国高等.*?奖2项);(\d+)项成果获全国教育',
        re.S)
    items = re.findall(pattern, html)[0]
    count[0] = int(items[2]) + int(items[3])
    count[1] = int(items[1])
    count[2] = int(items[0])
    print('国家奖', count[0])
    writeToExcel(9, count[0])
    print('省部级奖', count[1])
    writeToExcel(10, count[1])
    print('学术专著', count[2])
    writeToExcel(19, count[2])
Exemple #30
0
def WSL2(row, x, url, pt):
    name = 'Results/WSL201' + str(x) + '.xls'
    print('Writing to ' + name + ' ... ...')
    mr.initExcel(row, name)
    url = 'https://www.dxsbb.com/news/' + url
    html = mr.getOnePage(url, 'gbk')
    pattern = re.compile(pt, re.S)
    result = re.findall(pattern, html)
    t = 1
    l = len(result[0])
    pattern1 = re.compile("[\u4e00-\u9fa5]+", re.S)
    for item in result:
        for i in range(l):
            if i == 1:
                r = re.findall(pattern1, item[i])
                mr.writeToExcel(name, t, i, "".join(r))
            else:
                mr.writeToExcel(name, t, i, item[i])
        print(t)
        t += 1