コード例 #1
0
ファイル: zaoping_alerts.py プロジェクト: LinZou2018/reptile
def download(reponse):
    html = reponse.text
    pattern = re.compile('<div class="control clear data-bottom-id">[\s\S]*?<!----></div>')
    texts = re.findall(pattern, html)
    for htmlObject in texts:
        html = etree.HTML(htmlObject + "</div>")
        titles = html.xpath('//div[@class="content"]/a[1]/text()')[0].split()
        title = ""
        for i in titles:
            title += i + " "
        if title_find(title, come_from="zaoping_alerts"):
            return True
        print("zaoping_alerts")
        data = html.xpath('//div[@class="content"]/a[2]/text()')[0].split()
        text = ""
        for i in data:
            text += i + " "
        timeout = html.xpath('//div[@class="time"]/text()')[0].split()
        if len(timeout) == 1:
            date = UTCTime(timeout)
        else:
            date = ""
            for i in timeout:
                date += i + "  "
        storage(title, date, text)
コード例 #2
0
def divide(i):
    try:
        print("tuoniaox_alerts")
        # 将从网页中用正则匹配到的局部html内容,变为xml格式的文档
        html = etree.HTML(i)
        # 获取实时的发布时间
        timeout = html.xpath('//span/text()')[0]
        # 获取信息的内容
        texts = html.xpath('//p/text()')[0].split()
        text = ""
        for i in texts:
            text += i + " "
        # 分离出标题以及其发布的时间日期
        pattern = re.compile("【[\s\S]*?】")
        title = re.findall(pattern, text)[0]
        # 由于没有明确的id值
        if title_find(title, come_from="tuoniaox_alerts"):
            return
        pattern = re.compile("\d月\d日")
        accurate = re.findall(pattern, text)
        if accurate:
            accurate = accurate[0]
        else:
            accurate = ""
        author = "鸵鸟区块链:https://www.tuoniaox.com/"
        # 判断是否是该网站的原创作
        source = html.xpath('//a/@href')
        if source:
            source = "负责编译--原文:" + source
        else:
            source = "鸵鸟区块链--快讯"
        storage(title, author, timeout, accurate, source, text)
    except Exception as err:
        mistake(url="https://www.tuoniaox.com/", err=err)
コード例 #3
0
def download(object):
    texts = etree.tostring(object, method="text",
                           encoding="utf8").decode("utf8").split()
    timeout = texts[0]
    data = texts[1:-3]
    text = ""
    for i in data:
        text += i + " "
    pattern = re.compile('(【[\s\S]*?】)([\s\S]*。)')
    title_text = re.findall(pattern, text)
    title = title_text[0][0]
    if title_find(title, come_from="btc798_alerts"):
        return True
    text = title_text[0][1]
    storage(title, timeout, text)
コード例 #4
0
def download(text, reload):
    # 将分割后的数据再进行分割获取数据
    title = text[0]
    if title == "马云:不支持比特币":
        title = text[0] + " " + text[1]
        main = text[2:-2]
    else:
        main = text[1:-2]
    if title_find(title, come_from="gongxiangcj_alerts"):
        if reload == 3:
            return True
        else:
            reload += 1
            return
    timeout = text[-2] + " " + text[-1]
    storage(title, timeout, main)
コード例 #5
0
def download(reponse, url):
    try:
        print("bibaodao_alerts")
        html = etree.HTML(reponse.text)
        texts = html.xpath(
            '/html/body/div/div[3]/div[2]/div/div/div/div[1]/div[2]/div')
        for text in texts:
            data = etree.tostring(text, method="text",
                                  encoding="utf8").decode("utf8").split()
            timeout = data[0] + " " + data[1]
            mains = data[2:]
            main = ""
            for i in mains:
                main += i + " "
            pattern = re.compile("【[\s\S]*?】")
            title = re.findall(pattern, main)[0]
            if title_find(title, come_from="bibaodao_alerts"):
                return True
            storage(title, timeout, main)
    except Exception as err:
        mistake(url, err)
コード例 #6
0
def download(reponse):
    print("ihuoqiu_alerts")
    html = etree.HTML(reponse.text)
    date = time.asctime(time.localtime(time.time())).split()
    texts = html.xpath('//*[@id="panel1"]/div')
    for i in texts:
        text = etree.tostring(i, method="text",
                              encoding="utf8").decode("utf8").split()
        # 将时间调整好
        date[-2] = text[0]
        timeout = ""
        for da in date:
            timeout += da + " "
        title = text[1]
        if title_find(title, come_from="ihuoqiu_alerts"):
            break
        # 将正文组合好
        if text[-2] == "[查看原文]":
            main = text[2:-2]
        else:
            main = text[2:-1]
        storage(title, timeout, main)