def ranking_today2(): df = get_speadsheet_data() rank = 0 ranking = "昨日の新型コロナウイルス感染者数ランキング\n" is_same = False current_num = -1 yesterday = str((datetime.today() - timedelta(days=1)).date()) for city, num in df.loc[yesterday].sort_values( ascending=False).to_dict().items(): if current_num == num: is_same = True else: ranking += "\n" rank += 1 is_same = False if not is_same: if count_twitter(ranking) >= 230: ranking += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{yesterday}.html)" break else: ranking += f"{rank}位 {num}人 {city}" current_num = num else: if count_twitter(ranking) >= 230: ranking += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{yesterday}.html)" break else: ranking += f", {city}" post(ranking) return ranking
def post_nagoya(): if os.path.isfile("nagoya_lock.zip"): pass else: load_url = 'https://www.city.nagoya.jp/kenkofukushi/page/0000126920.html' html = requests.get(load_url) soup = BeautifulSoup(html.content, "html.parser") nagoya_h3 = soup.find("h3") date = datetime.strptime( nagoya_h3.text.split("令和3年")[1].split("現在")[0], '%m月%d日') # date = datetime.strptime(nagoya_h3.text.split("令和2年") # [1].split("現在")[0], '%m月%d日') today = datetime.today() if (today.month == date.month) & (today.day == date.day): is_today = True else: is_today = False article_text = nagoya_h3.next_element.next_element.find("p").text article_url = load_url num_today = int(re.sub("\\D", "", article_text)) # is_today = True if is_today: df0 = pandas.read_pickle("database.zip") num_last_week = get_number_by_delta(df0, -7, region="名古屋市") youbi = get_day_of_week_jp(today) header = f'[速報]名古屋市の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)でした。詳細は公式サイトを参照 > {article_url}' post(header) data_for_save = pandas.DataFrame([{ '本日': num_today, '先週': num_last_week }], index=['名古屋市']) data_for_save.to_pickle("nagoya_lock.zip") time.sleep(5) print("名古屋市更新しました", datetime.today())
def post() -> None: from twitter_post import post medical = get_medical_number(get_medical_data()) non_medical = get_non_medical_number(get_open_data()) headline = generate_headline_first_second(medical, non_medical) last_number = get_last_total_number() current_numer = extract_total_number(headline) if current_numer > last_number: post(headline) else: print("The number is the same as the last number.")
def post_toyohashi(): if os.path.isfile("toyohashi_lock.zip"): pass else: load_url = 'https://www.city.toyohashi.lg.jp/41805.htm' html = requests.get(load_url) soup = BeautifulSoup(html.content, "html.parser") toyohashi_new = soup.find(class_='Item_normal') # toyohashi_header = "豊橋市が新型コロナウイルス情報を更新しました > " article_text = toyohashi_new.text.replace("\n", "").replace("\xa0", "") article_url = load_url today = datetime.today() nums = re.findall(r"\d+", article_text) nums = [ _num.translate( str.maketrans( {chr(0xFF01 + i): chr(0x21 + i) for i in range(94)})) for _num in nums ] date = datetime.strptime(f"{nums[1]}月{nums[2]}日", "%m月%d日") if (today.month == date.month) & (today.day == date.day): is_today = True else: is_today = False # is_today = True if is_today: num_today = int(nums[3]) df0 = pandas.read_pickle("database.zip") num_last_week = get_number_by_delta(df0, -7, region="豊橋市") youbi = get_day_of_week_jp(today) header = f'[速報]豊橋市の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)でした。詳細は公式サイトを参照 > {article_url}' post(header) data_for_save = pandas.DataFrame([{ '本日': num_today, '先週': num_last_week }], index=['豊橋市']) data_for_save.to_pickle("toyohashi_lock.zip") time.sleep(5) print("豊橋市更新しました", datetime.today())
def post_zentai(): is_toyohashi_done = os.path.isfile("toyohashi_lock.zip") is_toyota_done = os.path.isfile("toyota_lock.zip") is_okazaki_done = os.path.isfile("okazaki_lock.zip") is_nagoya_done = os.path.isfile("nagoya_lock.zip") is_aichi_done = os.path.isfile("aichi_lock.zip") is_ichinomiya_done = os.path.isfile("ichinomiya_lock.zip") is_dones = [ is_aichi_done, is_nagoya_done, is_okazaki_done, is_toyohashi_done, is_toyota_done, is_ichinomiya_done ] if (all(is_dones)) and (not os.path.isfile("zentai.lock")): df_toyohashi = pandas.read_pickle("toyohashi_lock.zip") df_toyota = pandas.read_pickle("toyota_lock.zip") df_okazaki = pandas.read_pickle("okazaki_lock.zip") df_nagoya = pandas.read_pickle("nagoya_lock.zip") df_aichi = pandas.read_pickle("aichi_lock.zip") df_ichinomiya = pandas.read_pickle("ichinomiya_lock.zip") df_today = pandas.concat([ df_toyohashi, df_aichi, df_nagoya, df_toyota, df_okazaki, df_ichinomiya ]) num_today = df_today['本日'].sum() num_last_week = df_today['先週'].sum() youbi = get_day_of_week_jp(datetime.today() - timedelta(hours=6)) article_url = 'https://www.pref.aichi.jp/site/covid19-aichi/' header = f'[速報]本日の愛知県全体の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)でした。詳細は公式サイトを参照 > {article_url}' # print(header) post(header) df_today.to_pickle( os.path.join( "data", f"{str(datetime.today()-timedelta(hours=6)).split()[0]}_from_sum.zip" )) with open("zentai.lock", "w", encoding="utf-8") as f: f.write("") time.sleep(5) print("愛知県全体更新しました", datetime.today())
def post_aichi(): if os.path.isfile("aichi_lock.zip"): pass else: d_atom = feedparser.parse( 'https://www.pref.aichi.jp/rss/10/site-758.xml') is_today = False today = datetime.today() - timedelta(hours=6) for entry in d_atom['entries']: _day = datetime.strptime(entry['updated'], "%Y-%m-%dT%H:%M:%S+09:00") # print(_day.month, today.month, _day.day, today.day, # entry['title'], '新型コロナウイルス感染症患者の発生について') if (_day.month == today.month) and (_day.day == today.day) and ( entry['title'] == '新型コロナウイルス感染症患者の発生について'): article_url = entry['id'] is_today = True break if is_today: load_url = article_url # load_url = 'https://www.pref.aichi.jp/site/covid19-aichi/pressrelease-ncov201208.html' html = requests.get(load_url) soup = BeautifulSoup(html.content, "html.parser") article_text = soup.find(class_="mol_textblock").text nums = re.findall(r"\d+", article_text) num_today = int(nums[0]) df0 = pandas.read_pickle("database.zip") num_last_week = get_number_by_delta(df0, -7, region="愛知県") youbi = get_day_of_week_jp(today) header = f'[速報]愛知県管轄自治体(名古屋市・豊橋市・豊田市・岡崎市・一宮市を除く愛知県)の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)でした。詳細は公式サイトを参照 > {article_url}' post(header) data_for_save = pandas.DataFrame([{ '本日': num_today, '先週': num_last_week }], index=['愛知県']) data_for_save.to_pickle("aichi_lock.zip") time.sleep(5) print("愛知県更新しました", datetime.today())
def ranking_week2(): data = get_speadsheet_data() rank = 0 ranking = "昨日まで直近1週間の新型コロナウイルス感染者数ランキング\n" is_same = False current_num = -1 yesterday = str((datetime.today() - timedelta(days=1)).date()) df = pandas.DataFrame([]) indices = data.index.sort_values(ascending=False) for num in range(len(indices) - 6): df1 = data.loc[indices[num:num + 7], :].sum().to_frame().transpose() df1.index = [indices[num]] df = pandas.concat([df, df1]) for city, num in df.loc[yesterday].loc[yesterday].sort_values( ascending=False).to_dict().items(): if current_num == num: is_same = True else: ranking += "\n" rank += 1 is_same = False if not is_same: if count_twitter(ranking) >= 230: ranking += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{yesterday}_week.html)" break else: ranking += f"{rank}位 {num}人 {city}" current_num = num else: if count_twitter(ranking) >= 230: ranking += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{yesterday}_week.html)" break else: ranking += f", {city}" post(ranking) return ranking
def ranking_week(): if not (os.path.isfile("ranking_week.lock")): this_mo = pandas.read_pickle("database.zip") # this_mo = pandas.read_pickle(f"{os.path.splitext(pdf_name)[0]}.zip") this_week = this_mo[this_mo["発表日"] >= datetime.today() - timedelta(days=8) - timedelta(hours=6)] pd_week = pandas.DataFrame( collections.Counter(this_week["住居地"]).most_common()) pd_week[0] = [_.replace("⻄", "西") for _ in pd_week[0]] ranking_text = "昨日まで直近1週間の新型コロナウイルス感染者数ランキング\n" rank = 0 num_prior = 0 yesterday = datetime.today() - timedelta(days=1) for city, num in zip(pd_week[0], pd_week[1]): if num == num_prior: # if parse_tweet(ranking_text).weightedLength > 258: if parse_tweet(ranking_text).weightedLength > 223: ranking_text += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{str(yesterday.date())}_week.html)" break else: ranking_text += f", {city}" else: # if parse_tweet(ranking_text).weightedLength > 252: if parse_tweet(ranking_text).weightedLength > 227: ranking_text += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{str(yesterday.date())}_week.html)" break else: rank += 1 ranking_text += f"\n{rank}位 {num}人: {city}" num_prior = num * 1 # print(ranking_text, parse_tweet(ranking_text).weightedLength) post(ranking_text) # print(header) with open("ranking_week.lock", "w", encoding="utf-8") as f: f.write("")
def post_toyota(): # if os.path.isfile("toyota_lock.zip"): # pass # else: load_url = 'https://www.city.toyota.aichi.jp/kurashi/kenkou/eisei/1039225.html' html = requests.get(load_url) soup = BeautifulSoup(html.content, "html.parser") toyota_new = soup.find(class_="objectlink") today = f"{datetime.today().month}月{datetime.today().day}日" article_text = toyota_new.find("li").text article_url = urljoin(load_url, toyota_new.find("li").find('a')['href']) today = datetime.today() nums = re.findall(r"\d+", article_text) # print(nums) date = datetime.strptime(f"{nums[0]}月{nums[1]}日", "%m月%d日") if (today.month == date.month) & (today.day == date.day): is_today = True else: is_today = False is_zero = False if not is_today: load_url2 = "https://www.city.toyota.aichi.jp/kurashi/kenkou/eisei/1037578.html" html = requests.get(load_url2) soup = BeautifulSoup(html.content, "html.parser") toyota_new = soup.find("h2") article_text = toyota_new.next_element.next_element.next_element.text nums = re.findall(r"\d+", article_text) is_zero = "いません" in article_text date = datetime.strptime(f"{nums[0]}月{nums[1]}日", "%m月%d日") if (today.month == date.month) & (today.day == date.day): is_today = True else: is_today = False # ex1 = "市内在住者(3人)が新型コロナウイルスに感染したことが判明しました。(1248~1250例目)" if is_today: if len(nums) == 3: num_today = 1 elif is_zero: num_today = 0 else: num_today = int(nums[3]) - int(nums[2]) + 1 # num_today = int(nums[2]) df0 = pandas.read_pickle("database.zip") num_last_week = get_number_by_delta(df0, -7, region="豊田市") youbi = get_day_of_week_jp(today) if not os.path.isfile("toyota_lock.zip"): header = f'[速報]豊田市の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)でした。詳細は公式サイトを参照 > {article_url}' elif int(pandas.read_pickle("toyota_lock.zip").loc["豊田市", "本日"]) < num_today: header = f'[更新]豊田市の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)に更新されました。詳細は公式サイトを参照 > {article_url}' else: header = None if (header is not None) and (num_today > 0): data_for_save = pandas.DataFrame([{ '本日': num_today, '先週': num_last_week }], index=['豊田市']) data_for_save.to_pickle("toyota_lock.zip") post(header) time.sleep(5) print("豊田市更新しました", datetime.today())
if h3.find("a") is not None: article_url = urljoin(load_url, h3.find("a")['href'].replace("./", "")) else: article_url = load_url # print(article_url) youbi = get_day_of_week_jp(today) if not os.path.isfile("okazaki_lock.zip"): header = f'[速報]岡崎市の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)でした。詳細は公式サイトを参照 > {article_url}' elif int(pandas.read_pickle("okazaki_lock.zip").loc["岡崎市", "本日"]) < num_today: header = f'[更新]岡崎市の本日の新型コロナウイルスの新規感染者数は{num_today}人(先週の{youbi}に比べて{num_today-num_last_week:+}人)に更新されました。詳細は公式サイトを参照 > {article_url}' else: header = None if header is not None: post(header) # print(header) data_for_save = pandas.DataFrame([{ '本日': num_today, '先週': num_last_week }], index=['岡崎市']) data_for_save.to_pickle("okazaki_lock.zip") time.sleep(5) print("岡崎市更新しました", datetime.today()) def post_toyohashi(): if os.path.isfile("toyohashi_lock.zip"): pass else:
# compile then match repatter = re.compile(pattern) result = repatter.match(text) return int(result.group(1)) def get_last_total_number() -> int: from twitter_post import get_posts timelines = get_posts() text_line_text = get_last_post(timelines) return extract_total_number(text_line_text) if __name__ == "__main__": post() # post2_vaccination() # medical = get_medical_number(get_medical_data()) # non_medical = get_non_medical_number(get_open_data()) # print(generate_headline_first_second(medical, non_medical)) # a = get_vaccination_number_from_open_data_df("summary_by_prefecture.csv") # print(a["count_first_or_mid_general"]) # print(a["count_second_or_full_general"]) # print(get_medical_number("IRYO-kenbetsu-vaccination_data.xlsx")) # df_m = get_df("IRYO-kenbetsu-vaccination_data.xlsx", "医療従事者接種回数") # print(generate_headline()) # print(get_open_data()) # print(get_vaccination_number())
def ranking_today(): if not (os.path.isfile("ranking_today.lock")): press_url = "https://www.pref.aichi.jp/site/covid19-aichi/index-2.html" html = requests.get(press_url) soup = BeautifulSoup(html.content, "html.parser") # デプロイ前にタイムデルタを取る # today = (datetime.today()).strftime('%Y年%-m月%-d日') today = (datetime.today() - timedelta(days=1) - timedelta(hours=6)).strftime('%Y年%-m月%-d日') url_flake = "" for li in soup.find(class_="list_ccc").find_all("li"): if (today in li.text) & ("感染者の発生" in li.text) & ("愛知県職員における" not in li.text): url_flake = li.find("a")["href"] if url_flake != "": today_url = urljoin(multi_dirname(press_url, 3), url_flake) html = requests.get(today_url) soup = BeautifulSoup(html.content, "html.parser") pdf_url = urljoin( multi_dirname(press_url, 3), soup.find(class_="detail_free").find("a")["href"]) pdf_file_path = os.path.join( "data", f"{str(datetime.today()-timedelta(hours=6)).split()[0]}_aichi.pdf" ) urlretrieve(pdf_url, pdf_file_path) tbls = camelot.read_pdf(pdf_file_path, pages='1-end') dfs = [] for table in tbls: df = table.df dfs.append(df) df_all = pandas.concat(dfs) df_all.columns = df_all.iloc[0, :] df_all = df_all[df_all["年代"] != "年代"] # デプロイ前にタイムデルタを消す # _name = str(datetime.today()).split()[0] # _name = str(datetime.today() - timedelta(days=1)).split()[0] # df_zentai = pandas.read_pickle( # os.path.join("data", f"{_name}_from_sum.zip")) df_zentai = get_last_numbers_from_posts(get_posts(tweet_number=30), day_before=1) df_zentai.pop("愛知県管轄") df_zentai = pandas.DataFrame.from_dict(df_zentai, orient="index") df_zentai.columns = ["本日"] aichi_kobetsu = pandas.DataFrame( collections.Counter(df_all["居住地"]).most_common()) aichi_kobetsu = aichi_kobetsu.set_index(0) aichi_kobetsu.columns = ["本日"] aichi_total = pandas.concat([ aichi_kobetsu, df_zentai[df_zentai.index != "愛知県"] ]).sort_values("本日", ascending=False) ranking_text = "昨日の新型コロナウイルス感染者数ランキング\n" rank = 0 num_prior = 0 yesterday = datetime.today() - timedelta(days=1) for city, num in zip(aichi_total.index, aichi_total["本日"]): if num == num_prior: if parse_tweet(ranking_text).weightedLength > 233: # if parse_tweet(ranking_text).weightedLength > 258: ranking_text += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{str(yesterday.date())}.html)" break else: ranking_text += f", {city}" else: if parse_tweet(ranking_text).weightedLength > 226: # if parse_tweet(ranking_text).weightedLength > 251: ranking_text += f"(以下 https://narumi-midori.net/twitter_aichi_covid19/{str(yesterday.date())}.html)" break else: rank += 1 ranking_text += f"\n{rank}位 {num}人: {city}" num_prior = num * 1 # print(ranking_text) post(ranking_text) with open("ranking_today.lock", "w", encoding="utf-8") as f: f.write("")
def post_cities(): from twitter_post import get_posts, post numbers_from_tweets = get_last_numbers_from_posts(get_posts()) info = pre_post("岡崎市", get_okazaki_info) # print(info) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"]) info = pre_post("豊橋市", get_toyohashi_info) # print(info) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"]) info = pre_post("豊田市", get_toyota_info, engine_number=2) # print(info) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"]) info = pre_post("一宮市", get_ichinomiya_info) # print(info) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"]) info = pre_post("名古屋市", get_nagoya_info) # print(info) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"]) info = pre_post("愛知県管轄自治体(名古屋市・豊橋市・豊田市・岡崎市・一宮市を除く愛知県)", get_aichi_ken_info) # print(info) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"]) # info_list = [info_okazaki, info_toyohashi, info_toyota, # info_ichinomiya, info_nagoya, info_aichi_ken] # for info in info_list: # print(f"----------{info['city']}---------") # print(numbers_from_tweets[info["city"]]) # print(info) # if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) # else: # print("Not postable", info['city']) time.sleep(20) numbers_from_tweets = get_last_numbers_from_posts(get_posts()) info = pre_post_zentai(get_zentai_info, numbers_from_tweets) print("-------------全体-------------") # post(info_zentai["headline"]) if (info["is_postable"]) & (numbers_from_tweets[info["city"]] < info["number_today"]): # print(info["headline"]) post(info["headline"]) else: print(info["city"], info["is_postable"], numbers_from_tweets[info["city"]], info["number_today"])