Example #1
0
 def saveConf(self):
     # TODO 上传到服务器、保存数据到数据库提供监控查询点
     # 弹窗输入配置的名称,配置执行控制的条件、定时配置等等信息录入到数据库
     filename = "testxuexi111.json"
     # 保存配置
     file = File_file()
     file.save_source(path="./configs", file=filename, all_the_text=str(self.conf))
Example #2
0
def read_detial(url, i):
    detial_html = htmlSource.get_html(url_p=url, type_p='rg')
    #print(detial_html)
    # 写html
    files = File_file()
    names = url.split('/')
    file_name = names[len(names) - 1]

    files.save_source(path=path,
                      file=file_name,
                      all_the_text=detial_html,
                      encoding_='utf-8')
    colum = [
        ('title', '//h1[@class="articleHead"]//text()', 'l'),
        ('pushDate',
         '//div[@class="info"]//span[@class="infoA"][@id="pubtime_baidu"]//text()',
         'l'), ('content', '//div[@class="articleText"]//text()', 'sarra', ',')
    ]
    result = rule.html_content_analysis_detial(html_text=detial_html,
                                               column=colum,
                                               url=url)
    print(result)
    #sql="insert into cancer value('%s','%s','%s','%s','%s')"%(result[0][1][0],str(result[1][1][0]).replace('患者,图片因隐私问题无法显示','').replace("患者,","患者:").replace("医生,","医生:").replace('\'','"'),type,'春雨医生',url)
    #print(sql)
    # 写文件
    # web_name(网站名)、web_url(网址)、titile(标题)、text(新闻内容)、publish_date(发布时间)
    csv = Csv_base()
    csv.write_csv_file_line(file_path=path + "/data.csv",
                            str=[
                                '参考消息', url, result[0][1], result[1][1],
                                result[2][1], i,
                                time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))
                            ])
Example #3
0
 def crawlerDetail(self, confs, url=''):
     rule = Rule()
     result = rule.crawler_detail(confs=confs, url=url)
     file = File_file()
     # 写入数据
     print(result)
     file.save_source(path='../data/',
                      file='xuexi111Detail.json',
                      all_the_text=str(result) + '\n')
Example #4
0
def read_detial(url, i):
    detial_html = htmlSource.get_html(url_p=url, type_p='rg')
    #print(detial_html)
    # 写html
    files = File_file()
    file_name = "%d.json" % i

    files.save_source(path=path,
                      file=file_name,
                      all_the_text=detial_html,
                      encoding_='utf-8')
Example #5
0
#!/usr/bin/env python
# coding=utf-8

from common.inc_csv import Csv_base
from common.inc_file import File_file
from lxml import html
import re

csv = Csv_base()
file = File_file()


def replaceStr(a):
    print(a)
    a = re.sub(re.compile(r"收藏查看我的收藏(\d+)有用(.*?)(\d+)已投票(\d+)", re.S), "", a)
    a = str(a).replace("编辑锁定", " ").strip()
    a = str(a).replace("讨论999", " ").strip()
    a = str(a).replace("本词条缺少概述图,补充相关内容使词条更完整,还能快速升级,赶紧来编辑吧!", " ").strip()
    a = str(a).replace(
        "百度百科内容由网友共同编辑,如您发现自己的词条内容不准确或不完善,欢迎使用本人词条编辑服务(免费)参与修正。", " ").strip()
    a = str(a).replace("立即前往 >>", " ").strip()
    print(a)
    return a


if __name__ == '__main__':

    csv_data_path = "../../data/百科候选关键词.csv"
    rows = csv.read_csv_file(csv_data_path)
    for row in rows:
        try:
Example #6
0
# -- coding: UTF-8 --

# 导入测试实例文件数据用,
from common.inc_csv import Csv_base

# DateUtils用日期计算工具
from datetime import datetime,  timedelta, date,timezone
from time import time, ctime, localtime, strftime, strptime, mktime
import re,json,calendar
# 寿星天文历 农历转公历 公历转农历
#import sxtwl
from common.inc_file import File_file

csv = Csv_base()
file = File_file()

class DateUtils:

    cn_num = {
        '〇': '0', '一': '1', '二': '2', '三': '3', '四': '4', '五': '5', '六': '6', '七': '7', '八': '8', '九': '9',
        '零': '0', '壹': '1', '贰': '2', '叁': '3', '肆': '4', '伍': '5', '陆': '6', '柒': '7', '捌': '8', '玖': '9',
        '貮': '2', '两': '2', '俩': '2', '十': '',
    }
    cn_date_day = {
        "大前天": -3,
        "大后天": 3,
        "明天": 1,
        "后天": 2,
        "昨天": -1,
        "今天": 0,
        "前天": -2,
Example #7
0
# -- coding: UTF-8 --

import requests
from lxml import etree
from common.inc_file import File_file

file = File_file()
urlx = 'https://jieqi.supfree.net/cntv.asp?n='
session = requests.Session()

for year in range(833, 5001):
    dicall = {}
    url = urlx + str(year)
    resp = session.get(url=url)
    resp.encoding = 'gb2312'
    rep = etree.HTML(resp.text)
    a = rep.xpath('//table/tr/td/a/text()')
    b = rep.xpath('//table/tr/td/text()')
    dicall[str(year)] = {}
    for i in range(len(a)):
        if len(b) > i:
            dicall[str(year)][a[i]] = str(b[i]).strip()[:-9]
    file.save_source(path="./",
                     file="jieqi.json",
                     all_the_text=str(dicall) + "\n")