def extract_date_from_header(header: str) -> datetime: """'人口(H28.1.1時点)' といったテキストから日付を取得 Args: header (str): '人口(H28.1.1時点)' といったテキスト Returns: datetime: 取得した日付 """ janera = Japanera() header = header.replace("\n", '') if (len(re.findall(r'[【((](.*?)[))】]', header)) == 0): print('Error: {0} has no date text.'.format(header)) return None match = re.findall(r'[【((](.*?)[))】]', header)[-1] datesource = re.search(r'([^0-9元]*)([0-9元]*)\.(.*)\.(.*)時点', match) if (not datesource): print('Error: {0} has no date text.'.format(header)) return None mydate = sorted(janera.strptime( '{0}{1}年{2}月{3}日'.format( datesource.groups()[0], datesource.groups()[1].replace('元', '1').zfill(2), datesource.groups()[2].zfill(2), datesource.groups()[3].zfill(2)), "%-a%-o年%m月%d日"), key=lambda x: x.year) return mydate[-1]
def _preprocess_date(result): """ Parameters ---------- result : str 実行結果.日付けらしいことを求める Returns ------- date result を datetime の date として表現したもの """ result = result.translate(_td1) mmdd = r'((\d{1,2})月((\d{1,2})日)?)?' eraname = r'(天平)?[^\d]{2}((\d{1,2})|元)年?' years = r'(\d{1,4})年?' mt = re.match(r'({})(\({}\))?'.format(years, eraname) + mmdd, result) if mt: result = _from_mtgrp_to_str(mt.groups(), 0, 1, 4, 6, 8, 7, 9).translate(_td2) try: if result.endswith("-"): date = datetime_parser(result[:-1]).date() else: date = datetime_parser(result).date() return date except ParserError as e: print(e, file=sys.stderr) raise RuntimeError( "Cannot parser as date '{}'".format(result)) from e mt = re.match(r'({})(\({}\))?'.format(eraname, years) + mmdd, result) if mt: result = _from_mtgrp_to_str(mt.groups(), 0, 3, 2, 6, 8, 7, 9) mt = re.match(r'\(({})\){}'.format(eraname, mmdd), result) if mt: result = _from_mtgrp_to_str(mt.groups(), 0, 3, 2, 4, 6, 5, 7) jn = Japanera() fmt = "%-E%-kO年" fmt += "%m月%d日" if "月" in result and "日" in result else "%m月" if "月" in result else "" res = jn.strptime(result, fmt) if res: return res[0].date() else: raise RuntimeError("Cannot parse as date '{}' by '{}'".format( result, fmt))
def extract_date_from_title(title: str) -> datetime: """extract date object from title string Args: title (str): Title string, like マイナンバーカード交付状況(令和2年6月1日現在) Returns: datetime: Date object """ janera = Japanera() match = re.search(r'[((](.*)[))]', title) if (not match): return False datesource = re.search(r'([^0-9元]*)([0-9元]*)年(.*)月(.*)日', match.groups()[0]) mydate = janera.strptime( '{0}{1}年{2}月{3}日'.format( datesource.groups()[0], datesource.groups()[1].replace('元', '1').zfill(2), datesource.groups()[2].zfill(2), datesource.groups()[3].zfill(2)), "%-E%-o年%m月%d日") return mydate[0]
def get_latest_info(): """最新の発表の日付とexcelリンクを取得する """ url = "http://www.pref.osaka.lg.jp/iryo/osakakansensho/happyo_kako.html" res = requests.get(url) res.encoding = res.apparent_encoding soup = BeautifulSoup(res.text, "html.parser") a_list = soup.select(".detail_free>p>a") japanera = Japanera() #excelファイルのリンクの中で一番新しいものを探す latest_date = None latest_link = None for a in a_list: print(a.text) if "Excel" in a.text: date_text = re.split("[((]", a.text)[0] # date_text=a.text.split("(")[0] dt = japanera.strptime(date_text, "%-E%-O年%m月%d日")[0] date = dt.date() print(date.isoformat()) if latest_date is None: latest_date = date latest_link = a.get("href") else: if date > latest_date: latest_date = date latest_link = a.get("href") latest_link = urllib.parse.urljoin(url, latest_link) #絶対パスにする print("最新の日付:", latest_date) print("リンク:", latest_link) return latest_date, latest_link
class TestJapanera(unittest.TestCase): """ test class of japera """ japera = Japanera() def test_before_era(self): actual = self.japera.era(date(645, 7, 19), False) self.assertEqual(None, actual) def test_before_era_chris(self): actual = self.japera.era(date(645, 7, 19)).kanji self.assertEqual("西暦", actual) def test_start_of_era(self): actual = self.japera.era(date(645, 7, 20)) self.assertEqual( Era("大化", "Taika", date(645, 7, 20), date(650, 3, 25), "common"), actual) def test_end_of_era(self): actual = self.japera.era(date(650, 3, 25)) self.assertNotEqual( Era("大化", "Taika", date(645, 7, 20), date(650, 3, 25), "common"), actual) def test_has_no_name(self): actual = self.japera.era(date(654, 11, 27)) self.assertEqual( Era(None, None, date(654, 11, 27), date(686, 8, 17), "common"), actual) def test_daikaku_era(self): actual = self.japera.daikaku_era(date(1336, 12, 19)) self.assertEqual( Era("延元", "Engen", date(1336, 4, 19), date(1340, 6, 2), "daikakuji"), actual) def test_jimyouin_era(self): actual = self.japera.jimyouin_era(date(1336, 12, 19)) self.assertEqual( Era("建武", "Kenmu", date(1334, 3, 13), date(1338, 10, 19), "jimyouin"), actual) def test_primary_jimyouin(self): japera_c = Japanera("jimyouin") actual = japera_c.era(date(1336, 12, 19)) self.assertEqual( Era("建武", "Kenmu", date(1334, 3, 13), date(1338, 10, 19), "jimyouin"), actual) def test_strftime_jimyouin_O_not_first_year(self): actual = self.japera.jimyouin_era(date(1336, 12, 19)).strftime( date(1336, 12, 19), "%-E%-O年%m月%d日") self.assertEqual("建武03年12月19日", actual) def test_strftime_jimyouin_O_first_year(self): actual = self.japera.jimyouin_era(date(1334, 6, 19)).strftime( date(1334, 6, 19), "%-E%-O年%m月%d日") self.assertEqual("建武元年06月19日", actual) def test_strftime_jimyouin_o_first_year(self): actual = self.japera.jimyouin_era(date(1334, 6, 19)).strftime( date(1334, 6, 19), "%-E%-o年%m月%d日") self.assertEqual("建武01年06月19日", actual) def test_strftime_future_o_first_year(self): actual = self.japera.jimyouin_era(date(2019, 5, 1)).strftime( date(2019, 5, 1), "%-E%-o年%m月%d日") self.assertEqual("令和01年05月01日", actual) def test_strftime_future_O_first_year(self): actual = self.japera.jimyouin_era(date(2019, 5, 1)).strftime( date(2019, 5, 1), "%-E%-O年%m月%d日") self.assertEqual("令和元年05月01日", actual) def test_strftime_future_far(self): actual = self.japera.jimyouin_era(date(2048, 5, 1)).strftime( date(2048, 5, 1), "%-E%-O年%m月%d日") self.assertEqual("令和30年05月01日", actual) def test_strftime_japera_dot_strftime(self): actual = self.japera.strftime(date(2048, 5, 1), "%-E%-O年%m月%d日") self.assertEqual("令和30年05月01日", actual) def test_strftime_and_strptime_O(self): actual = self.japera.era(date(1500, 1, 1)).strptime( self.japera.era(date(1500, 1, 1)).strftime(date(1500, 1, 1), "%-E%-O年%m月%d日"), "%-E%-O年%m月%d日").date() self.assertEqual(date(1500, 1, 1), actual) def test_strftime_and_strptime_o(self): actual = self.japera.era(date(1500, 1, 1)).strptime( self.japera.era(date(1500, 1, 1)).strftime(date(1500, 1, 1), "%-E%-o年%m月%d日"), "%-E%-o年%m月%d日").date() self.assertEqual(date(1500, 1, 1), actual) def test_strptime_japanera(self): actual = self.japera.strptime("平成31年04月16日", "%-E%-O年%m月%d日") self.assertEqual([datetime(2019, 4, 16, 0, 0)], actual) def test_in_first_day(self): actual = self.japera.era(date(729, 9, 7))._in(date(729, 9, 6)) self.assertTrue(actual) def test_in_middle(self): actual = self.japera.era(date(729, 9, 7))._in(date(749, 1, 6)) self.assertTrue(actual) def test_in_last_day(self): actual = self.japera.era(date(729, 9, 7))._in(date(749, 5, 8)) self.assertFalse(actual) def test_english_chorten_vowel(self): actual = self.japera.era(date(729, 9, 7)).english_shorten_vowel self.assertEqual("Tempyo", actual) def test_english_head(self): actual = self.japera.era(date(729, 9, 7)).english_head self.assertEqual("T", actual) def test_is_after_real_after(self): actual = self.japera.era(date(729, 9, 7)).is_after(date(500, 1, 1)) self.assertTrue(actual) def test_is_after_not_after_first_day(self): actual = self.japera.era(date(729, 9, 7)).is_after(date(729, 9, 6)) self.assertFalse(actual) def test_is_after_not_after_middle(self): actual = self.japera.era(date(729, 9, 7)).is_after(date(735, 9, 6)) self.assertFalse(actual) def test_is_after_not_after_last_day(self): actual = self.japera.era(date(729, 9, 7)).is_after(date(749, 5, 8)) self.assertFalse(actual) def test_is_after_not_after_after_day(self): actual = self.japera.era(date(729, 9, 7)).is_after(date(750, 5, 8)) self.assertFalse(actual) def test_is_before_not_before(self): actual = self.japera.era(date(729, 9, 7)).is_before(date(500, 1, 1)) self.assertFalse(actual) def test_is_before_not_before_first_day(self): actual = self.japera.era(date(729, 9, 7)).is_before(date(729, 9, 6)) self.assertFalse(actual) def test_is_before_not_before_middle(self): actual = self.japera.era(date(729, 9, 7)).is_before(date(735, 9, 6)) self.assertFalse(actual) def test_is_before_not_before_last_day(self): actual = self.japera.era(date(729, 9, 7)).is_before(date(749, 5, 8)) self.assertFalse(actual) def test_is_before_real_before_before_day(self): actual = self.japera.era(date(729, 9, 7)).is_before(date(750, 5, 8)) self.assertTrue(actual) def test_match(self): actual = self.japera.era_match(date(1370, 1, 1)) self.assertEqual({"正平", "応安"}, set(map(lambda x: x.kanji, actual))) def test_match_func_given(self): actual = self.japera.era_match("S", lambda x: x.english_head, lambda x, y: x == y) self.assertEqual({"S"}, set(map(lambda x: x.english_head, actual))) def test_check_all_english_head_lower(self): eras = self.japera.era_common_daikakuji + self.japera.era_common_jimyouin for era in eras: try: self.assertEqual(era.english_head, era.english_head.lower()) except: pass def test_eradate(self): Era("天平", "Tempyou", date(729, 9, 6), date(749, 5, 8), "common") era = self.japera.era(date(730, 1, 1)) test_eradate = EraDate.fromdate(date(730, 5, 2), era) self.assertEqual(test_eradate.era, era) def test_EraDate(self): era = EraDate.fromdate(date(2020, 1, 1)) self.assertEqual(era.era.kanji, "令和") self.assertEqual(str(era), "令和-2020-01-01") def test_EraDateTime(self): era = EraDateTime.fromdatetime(datetime(2019, 1, 1, 1, 1)) self.assertEqual(era.era.kanji, "平成") self.assertEqual(str(era), "平成-2019-01-01 01:01:00") def test_EraDatetime_strftime(self): actual = EraDate.fromdate(date(749, 5, 8)).strftime("%-E%-e%-A%-a%-o%-O") self.assertEqual(actual, "天平感宝TempyokampoTempyouKampouT01元") def test_japanera_strptime(self): actual = self.japera.strptime("平成三十一年四月十九日", "%-E%-kO年%-km月%-kd日") self.assertEqual(actual, [datetime(2019, 4, 19)]) def test_japanera_strftime(self): actual = self.japera.strftime(datetime(2019, 4, 19), "%-E%-kO年%-km月%-kd日") self.assertEqual(actual, "平成三十一年四月十九日") def test_eradate_strftime(self): actual = EraDate(2019, 4, 19).strftime("%-E%-kO年%-km月%-kd日") self.assertEqual(actual, "平成三十一年四月十九日") def test_strftime_yyyy(self): actual = EraDate(2002, 4, 19).strftime("%-E%Y年%-km月%-kd日") self.assertEqual(actual, "平成2002年四月十九日") def test_strptime_yyyy(self): actual = self.japera.strptime("平成2002年四月十九日", "%-E%Y年%-km月%-kd日") self.assertEqual(actual, [datetime(2002, 4, 19)]) def test_leap_kanji(self): actual = self.japera.strptime("令和02年02月29日", "%-E%-O年%m月%d日") self.assertEqual(actual, [datetime(2020, 2, 29)]) def test_leap_chris(self): actual = self.japera.strptime("西暦2004年02月29日", "%-E%Y年%m月%d日") self.assertEqual(actual, [datetime(2004, 2, 29)])
def test_primary_jimyouin(self): japera_c = Japanera("jimyouin") actual = japera_c.era(date(1336, 12, 19)) self.assertEqual( Era("建武", "Kenmu", date(1334, 3, 13), date(1338, 10, 19), "jimyouin"), actual)
import datetime from japanera import Japanera japera = Japanera() """ [out] Input the date you want to know how to say in Japanese! [out] format: [yyyy-mm-dd] [input] 0001-01-01 [out] We don't have japanese era for given date... [out] Input the date you want to know how to say in Japanese! [out] format: [yyyy-mm-dd] [input] 1200-01-01 [out] ----- 正治02年01月01日 ----- [out] Input the date you want to know how to say in Japanese! [out] format: [yyyy-mm-dd] [input] 1199-12-12 [out] ----- 正治元年12月12日 ----- [out] Input the date you want to know how to say in Japanese! [out] format: [yyyy-mm-dd] [input] 1199-01-01 [out] ----- 建久10年01月01日 ----- [out] Input the date you want to know how to say in Japanese! [out] format: [yyyy-mm-dd] """ while True: print( "Input the date you want to know how to say in Japanese!\nformat: [yyyy-mm-dd]" )
from datetime import date from japanera import (Japanera, EraDate, EraDateTime) janera = Japanera() G_dic = {'1': '明治', '2': '大正', '3': '昭和', '4': '平成', '5': '令和'} def gengo_helper(string): if len(string) == 5: G = string[0] YY = string[1:3] MM = string[3:5] gengo = G_dic[G] gengo_month = f'{gengo}{YY}年{MM}月' d = janera.strptime(gengo_month, "%-E%-o年%m月") western_month = f'{d[0].year}{d[0].month:02d}' return western_month elif len(string) == 7: G = string[0] YY = string[1:3] MM = string[3:5] DD = string[5:7] gengo = G_dic[G] gengo_date = f'{gengo}{YY}年{MM}月{DD}日' d = janera.strptime(gengo_date, "%-E%-o年%m月%d日") western_date = f'{d[0].year}{d[0].month:02d}{d[0].day:02d}' return western_date else: