Esempi in Python per lineNotify, esempi in Python per aws_linenotify.lineNotify

Esempio n. 1

0

Mostra file

File: storm_getView_2.0.py Progetto: drbrhbym/ec2_news_crawler

def getNewsView(urlQueue):
    while True:
        try:
            # 不阻塞的讀取佇列資料
            news_url = urlQueue.get_nowait()
            i = urlQueue.qsize()
        except Exception as e:
            break
        #print('Current Thread Name %s, Url: %s ' % (threading.currentThread().name, news_url))

        ## 爬蟲程式內容
        # News Tag轉換表
        tag_dict = {"財經": "finance",
                    "房地產": "finance",
                    "國內": "local",
                    "國際": "international",
                    "中港澳": "international",
                    "政治": "politics",
                    "公共政策": "politics",
                    "公民運動": "politics",
                    "風生活": "life",
                    "風攝影": "life",
                    "品味生活": "life",
                    "運動": "sports",
                    "評論": "forum",
                    "軍事": "military",
                    "科技": "technology",
                    "藝文": "arts",
                    "影音": "entertainment",
                    "歷史": "history",
                    "調查": "research"}

        news_response = urlopen(news_url)
        news_html = BeautifulSoup(news_response)

        artical_number = news_url.split("/")[-1]
        view_response = urlopen("https://service-pvapi.storm.mg/pvapi/get_pv/" + artical_number)
        view_html = BeautifulSoup(view_response)
        news_view = json.loads(view_html.text)["total_count"]

        news_tag = news_html.find("a", class_="tags_link").text

        #print("正在處理:", news_url)

        # 將新聞觀看數放入佇列
        try:
            viewQueue.put({"id": "storm-" + tag_dict[news_tag] + "-" + artical_number,
                           "news_link": news_url,
                           "view": news_view,
                           "time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M")})
        except KeyError as e:
            lineNotify("Got KeyError: " + str(e))

Esempio n. 2

0

Mostra file

File: usage_detect_2.0.py Progetto: drbrhbym/ec2_news_crawler

import os
from aws_linenotify import lineNotify

if os.path.exists("usage_output.txt"):
    with open("usage_output.txt", "r", encoding="utf-8") as f:
        output = f.read().split(" ")[8]
        #print(output)
        lineNotify("Disk usage " + output + " used!")

Esempio n. 3

0

Mostra file

File: storm_getContent_2.0.py Progetto: drbrhbym/ec2_news_crawler

def getNewsContent(urlQueue):
    while True:
        try:
            # 不阻塞的讀取佇列資料
            news_url = urlQueue.get_nowait()
            i = urlQueue.qsize()
        except Exception as e:
            break
        #print('Current Thread Name %s, Url: %s ' % (threading.currentThread().name, news_url))

        ## 爬蟲程式內容
        # News Tag轉換表
        tag_dict = {
            "財經": "finance",
            "房地產": "finance",
            "國內": "local",
            "國際": "international",
            "中港澳": "international",
            "政治": "politics",
            "公共政策": "politics",
            "公民運動": "politics",
            "風生活": "life",
            "風攝影": "life",
            "品味生活": "life",
            "運動": "sports",
            "評論": "forum",
            "軍事": "military",
            "科技": "technology",
            "藝文": "arts",
            "影音": "entertainment",
            "歷史": "history",
            "調查": "research"
        }

        news_response = urlopen(news_url)
        news_html = BeautifulSoup(news_response)
        news_tag = news_html.find("a", class_="tags_link").text
        news_title = news_html.find("h1", id="article_title").text

        artical_number = news_url.split("/")[-1]
        view_response = urlopen(
            "https://service-pvapi.storm.mg/pvapi/get_pv/" + artical_number)
        view_html = BeautifulSoup(view_response)
        news_view = json.loads(view_html.text)["total_count"]

        news_create_time = news_html.find("span", class_="info_time").text

        artical = news_html.find("div", class_="article_content_inner")
        content = []
        for p in artical.find_all("p"):
            content.append(p.text)
        news_content = "".join(content)

        news_keyword = []
        key_word = news_html.find_all("a", class_="tag tags_content")
        for word in key_word:
            news_keyword.append(word.text)

        #print("正在處理:", news_url)

        # 將新聞內容放入佇列
        try:
            newsQueue.put({
                "id":
                "storm-" + tag_dict[news_tag] + "-" + artical_number,
                "news_link":
                news_url,
                "news_title":
                news_title,
                "news_create_time":
                news_create_time,
                "news_content":
                news_content,
                "news_keyword":
                news_keyword,
                "news_tag":
                news_tag,
                "news_view": [{
                    "view": news_view,
                    "time": news_create_time
                }]
            })
        except KeyError as e:
            lineNotify("Got KeyError: " + str(e))

Esempio n. 4

0

Mostra file

File: storm_getContent_2.0.py Progetto: drbrhbym/ec2_news_crawler

                "news_view": [{
                    "view": news_view,
                    "time": news_create_time
                }]
            })
        except KeyError as e:
            lineNotify("Got KeyError: " + str(e))

        # 爲了突出效果，設定延時
        #time.sleep(1)


if __name__ == "__main__":

    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    lineNotify("Content of news started updating " + now)

    # 開啟要爬的新聞網址檔案
    while True:
        if os.path.exists("update_storm_news_url.txt"):
            with open("update_storm_news_url.txt", "r", encoding="utf-8") as f:
                url_list = f.read().split("\n")
            break
        else:
            time.sleep(120)

    if os.path.exists("update_for_view.txt"):
        view_update_url_list = url_list.copy()
        with open("update_for_view.txt", "r", encoding="utf-8") as f:
            old_view_list = f.read().split("\n")
        old_view_list.remove("")

Esempio n. 5

0

Mostra file

File: storm_getView_2.0.py Progetto: drbrhbym/ec2_news_crawler

            viewQueue.put({"id": "storm-" + tag_dict[news_tag] + "-" + artical_number,
                           "news_link": news_url,
                           "view": news_view,
                           "time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M")})
        except KeyError as e:
            lineNotify("Got KeyError: " + str(e))



        # 爲了突出效果，設定延時
        #time.sleep(1)

if __name__ == "__main__":

    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    lineNotify("Views of news started updating " + now)

    # 開啟要爬的新聞網址檔案
    while True:
        if os.path.exists("update_for_view.txt"):
            with open("update_for_view.txt", "r", encoding="utf-8") as f:
                url_list = f.read().split("\n")
            break
        else:
            time.sleep(120)

    # 紀錄爬蟲開始時間
    start_time = time.time()

    for url in url_list:
        if url == "":

Esempio n. 6

0

Mostra file

File: storm_getContent.py Progetto: drbrhbym/ec2_news_crawler

            # 將依照發布日期分類的新聞內容存檔
            with open("./newsfolder/" + date + "_storm_news.json",
                      "w",
                      encoding="utf-8") as f:
                json.dump(news_dict, f)

    # 紀錄存檔結束時間
    end_time = time.time()
    print('Done, Time cost: %s ' % (end_time - start_time))

    # 檢查用
    # print(len(news_list))
    # print(count)

    # 紀錄刪除檔案開始時間
    start_time = time.time()

    # 使用系統指令刪除檔案
    os.remove("update_storm_news_url.txt.bak")
    path = './tmpfolder/*'
    r = glob.glob(path)
    for i in r:
        os.remove(i)

    # 紀錄刪除檔案結束時間
    end_time = time.time()
    print('Done, Time cost: %s ' % (end_time - start_time))

    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    lineNotify("News content updated successfully " + now)