def main(): parser = argparse.ArgumentParser() parser.add_argument('-1', action='store_true', help='Markup for medical cases chart') parser.add_argument('-2', action='store_true', help='Markup for medical cases table') parser.add_argument('-3', action='store_true', help='Markup for charts') args = vars(parser.parse_args()) if not any((args['1'], args['2'], args['3'])): parser.print_help() sys.exit(1) if args['1']: wiki1() if args['2']: wiki2() if args['3']: wiki3() log.log('Done')
def load_home_data(): """Return data retrieved from MoHFW website.""" data = Data() # Save the response from MoHFW as a list of lines. url = 'https://www.mohfw.gov.in/' log.log('Connecting to {} ...', url) response = urllib.request.urlopen(url).read().decode('utf-8') lines = [l.strip() for l in response.splitlines()] lines = [l for l in lines if l != ''] # Parsers. strong_re = re.compile(r'.*<strong.*?>([0-9]*).*<') time_re = re.compile(r'.*as on\s*:\s*(\d.*) IST') td_re = re.compile(r'.*<td>([^#]*).*</td>') parser_state = 'DEFAULT' # Parse the response. for i, line in enumerate(lines): if data.active == -1 and 'Active' in line: data.active = int(strong_re.match(lines[i + 1]).group(1)) elif data.cured == -1 and 'Discharged' in line: data.cured = int(strong_re.match(lines[i + 1]).group(1)) elif data.death == -1 and 'Deaths' in line: data.death = int(strong_re.match(lines[i + 1]).group(1)) elif data.ref_datetime == None and 'as on' in line: t = time_re.match(line).group(1) data.ref_datetime = datetime.datetime.strptime( t, '%d %B %Y, %H:%M') data.ref_date = data.ref_datetime.strftime('%Y-%m-%d') data.ref_time = data.ref_datetime.strftime('%H:%M') data.total = data.active + data.cured + data.death return data
def compile_sass(*, last_build_time): sass_files = build_target.glob('**/*.sass') if all(not was_modified(path, since=last_build_time) for path in sass_files): log("Sass not modified since last build") else: with log_section("Compiling sass", multiline=False): shell_exec(f'cd "{build_target}" && sass --quiet --update .:.')
def fetch_wiki_source(article_name): """Return Wikitext from specified Wikipedia article.""" src_url = ('https://en.wikipedia.org/w/index.php?title={}&action=edit'. format(article_name)) log.log('Fetching wikitext for {} ...', src_url) response = urllib.request.urlopen(src_url).read().decode('utf-8') source = re.search(r'(?s)<textarea .*?>(.*)</textarea>', response).group(1) source = html.unescape(source) return source
def wiki1(): """Generate Wikipedia markup code for medical cases chart template.""" ignore_dates = ('2020-02-04', '2020-02-27') data = archive.load(ignore_dates=ignore_dates) update = source = fetch_wiki_source(WIKI_SRC1) update = replace_within('Total confirmed -->\n', '\n<!-- Date', update, wiki1_data(data)) log.log('Writing wiki1.txt and wiki1.diff ...') open('wiki1.txt', 'w').write(update) open('wiki1.diff', 'w').write(diff(source, update))
def wiki2(): """Generate Wikipedia markup for region table.""" data = mohfw.load_region_data() update = source = fetch_wiki_source(WIKI_SRC2) update = replace_within( '\\|- class="sorttop"\n', '\n\\|- class="sortbottom"', update, region_table_head(data) + '\n' + region_table_body(data)) log.log('Writing wiki2.txt and wiki2.diff ...') open('wiki2.txt', 'w').write(update) open('wiki2.diff', 'w').write(diff(source, update))
def find_items(directory: Path): """ Find all files in a directory that end in '.fm'; parse them and return the parsed items. """ items = [] with log_section("Looking for items"): for file_loc in directory.glob('**/*.fm'): log(f"found '{file_loc}'") items.append(parse_item(Path(file_loc))) log(f"found {len(items)} items") return items
def main(): """Render the home page.""" # Copy static files. if os.path.isdir('_site'): shutil.rmtree('_site') shutil.copytree('static', '_site') shutil.copy('indiacovid19.json', '_site') # Load COVID-19 archive data. log.log('Loading archive ...') data = archive.load() log.log('Found entries for {} days', len(data.dates)) # Format placeholder values. last_updated = data.last_ref_datetimes[-1].strftime('%d %b %Y %H:%M IST') new_growth = '{:+.0f}%'.format(data.total_growths[-1]) doubling_time = '{:.1f}'.format(data.doubling_times[-1]) cured_percent = '{:.0f}%'.format(data.cured_percents[-1]) death_percent = '{:.0f}%'.format(data.death_percents[-1]) cured_ratio = '{:.1f}'.format(data.cured_ratios[-1]) img_max_width = round(len(data.dates) * 100 / 40) # Render home page. log.log('Rendering home page ...') layout = fread('layout/index.html') output = render(layout, last_total=data.total_cases[-1], last_active=data.active_cases[-1], last_cured=data.cured_cases[-1], last_death=data.death_cases[-1], last_date=data.dates[-1], last_updated=last_updated, new_cases=data.total_diffs[-1], new_growth=new_growth, doubling_time=doubling_time, cured_percent=cured_percent, death_percent=death_percent, cured_ratio=cured_ratio, case_links=case_links(data), case_rows=case_rows(data)) fwrite('_site/index.html', output) # Render CSS. log.log('Rendering stylesheet ...') layout = fread('layout/main.css') output = render(layout, img_max_width=img_max_width) fwrite('_site/main.css', output) # Plot graphs. plot.plot_all(data) log.log('Done')
def medical_cases(): """Generate Wikipedia markup for medical cases template.""" home_data = mohfw.load_home_data() dash_data = mohfw.load_dash_data() if home_data.ref_datetime >= dash_data.ref_datetime: data = home_data log.log('Selected home page data') else: data = dash_data data.foreign = home_data.foreign log.log('Selected dashboard data') output = open('layout/medical_cases.txt').read() output = region_table_rows(data, output) output = region_table_foot(data, output) ignore_dates = ('2020-02-04', '2020-02-27') data = archive.load(ignore_dates=ignore_dates) output = medical_cases_plots(data, output) return output
def replace_within(begin_re, end_re, source, data): """Replace text in source between two delimeters with specified data.""" pattern = r'(?s)(' + begin_re + r')(?:.*?)(' + end_re + r')' source = re.sub(pattern, r'\1@@REPL@@\2', source) if '@@REPL@@' in source: source = source.replace('@@REPL@@', data) else: log.log('') log.log('ERROR: Cannot match {!r} and {!r}'.format(begin_re, end_re)) log.log('') return source
def load_home_data(): """Return data retrieved from MoHFW website.""" data = Data() # Save the response from MoHFW as a list of lines. url = 'https://www.mohfw.gov.in/' log.log('Connecting to {} ...', url) response = urllib.request.urlopen(url).read().decode('utf-8') lines = [l.strip() for l in response.splitlines()] lines = [l for l in lines if l != ''] # Parsers. strong_re = re.compile(r'.*<strong>(.*)</strong>') time_re = re.compile(r'.*as on\s*:\s*(\d.*) IST') foreign_re = re.compile(r'.*[Ii]ncluding (\d+)? ?[Ff]oreign') td_re = re.compile(r'.*<td>([^#]*).*</td>') parser_state = 'DEFAULT' # Parse the response. for i, line in enumerate(lines): if data.active == -1 and 'Active Cases' in line: data.active = int(strong_re.match(lines[i - 1]).group(1)) elif data.cured == -1 and 'Cured' in line: data.cured = int(strong_re.match(lines[i - 1]).group(1)) elif data.death == -1 and 'Deaths' in line: data.death = int(strong_re.match(lines[i - 1]).group(1)) elif data.migrated == -1 and 'Migrated' in line: data.migrated = int(strong_re.match(lines[i - 1]).group(1)) elif data.ref_datetime == None and 'as on' in line: t = time_re.match(line).group(1) data.ref_datetime = datetime.datetime.strptime( t, '%d %B %Y, %H:%M') data.ref_date = data.ref_datetime.strftime('%Y-%m-%d') data.ref_time = data.ref_datetime.strftime('%H:%M') elif data.foreign == -1 and 'foreign' in line: n = foreign_re.match(line).group(1) if n is not None: data.foreign = int(n) elif '<tbody>' in line: parser_state = 'REGION' elif parser_state == 'REGION' and '<tr>' in line: if 'Total' in lines[i + 2]: parser_state = 'REGION_TOTAL' continue # Parse region_name = td_re.match(lines[i + 2]).group(1) active = td_re.match(lines[i + 3]).group(1) cured = td_re.match(lines[i + 4]).group(1) death = td_re.match(lines[i + 5]).group(1) total = td_re.match(lines[i + 6]).group(1) # Normalize if region_name.startswith('Cases being reassigned'): region_name = 'reassigned' total = int(total) if total else -1 cured = int(cured) if cured else -1 death = int(death) if death else -1 active = int(active) if active else -1 # Save data.regions[region_name] = (total, active, cured, death) elif parser_state == 'REGION_TOTAL' and 'Total' in line: parser_state = 'DEFAULT' data.regions_active = int(strong_re.match(lines[i + 1]).group(1)) data.regions_cured = int(strong_re.match(lines[i + 3]).group(1)) data.regions_death = int(strong_re.match(lines[i + 6]).group(1)) data.regions_total = int(strong_re.match(lines[i + 9]).group(1)) data.total = data.active + data.cured + data.death + data.migrated # Validations. if data.total != data.regions_total: log.log('home page: Mismatch in total and regions_total') if data.active != data.regions_active: log.log('home page: Mismatch in active and regions_active') if data.cured + data.migrated != data.regions_cured: log.log('home page: Mismatch in cured + migrated and regions_cured') if data.death != data.regions_death: log.log('home page: Mismatch in death and regions_death') return data
def load_dash_data(): """Return data retrieved from MoHFW dashboard page.""" data = Data() # Retrieve MoHFW dashboard HTML. url = 'https://www.mohfw.gov.in/index.php' log.log('Connecting to {} ...', url) response = urllib.request.urlopen(url).read().decode('utf-8') lines = [l.strip() for l in response.splitlines()] lines = [l for l in lines if l != ''] # Parsers. strong_re = re.compile(r'.*<strong>(.*)</strong>') time_re = re.compile(r'.*as on\s*:\s*(\d.*) IST') js_re = re.compile(r"\['(.*)', (.*), (.*), (.*)\],") parser_state = 'DEFAULT' # Parse the response. for i, line in enumerate(lines): if data.active == -1 and 'Active Cases' in line: data.active = int(strong_re.match(lines[i - 1]).group(1)) elif data.cured == -1 and 'Cured' in line: data.cured = int(strong_re.match(lines[i - 1]).group(1)) elif data.death == -1 and 'Deaths' in line: data.death = int(strong_re.match(lines[i - 1]).group(1)) elif data.migrated == -1 and 'Migrated' in line: data.migrated = int(strong_re.match(lines[i - 1]).group(1)) elif data.ref_datetime == None and 'as on' in line: t = time_re.match(line).group(1) data.ref_datetime = datetime.datetime.strptime( t, '%d %B %Y, %H:%M') data.ref_date = data.ref_datetime.strftime('%Y-%m-%d') data.ref_time = data.ref_datetime.strftime('%H:%M') elif 'Hover' in line: break data.total = data.active + data.cured + data.death + data.migrated # Retrieve MoHFW JSON data. url = 'https://www.mohfw.gov.in/data/data.json' log.log('Connecting to {} ...', url) items = json.load(urllib.request.urlopen(url)) # Parse the response. for item in items: region_name = item['state_name'] total = int(item['positive']) cured = int(item['cured']) death = int(item['death']) active = total - cured - death data.regions[region_name] = (total, active, cured, death) # Region totals. data.regions_total = sum(v[0] for v in data.regions.values()) data.regions_active = sum(v[1] for v in data.regions.values()) data.regions_cured = sum(v[2] for v in data.regions.values()) data.regions_death = sum(v[3] for v in data.regions.values()) # Validations. if data.total != data.regions_total: log.log('dashboard: Mismatch in total and regions_total') if data.active != data.regions_active: log.log('dashboard: Mismatch in active and regions_active') if data.cured + data.migrated != data.regions_cured: log.log('dashboard: Mismatch in cured + migrated and regions_cured') if data.death != data.regions_death: log.log('dashboard: Mismatch in death and regions_death') return data
def plot_recent_wide(data): """Plot recent graphs only in approx. with 16:9 aspect ratio.""" log.log('Rendering total-cases-linear-recent plot ...') plot_total_cases_linear(data, recent=True, aspect='wide') log.log('Rendering total-cases-log-recent plot ...') plot_total_cases_log(data, recent=True, aspect='wide') log.log('Rendering new-cases-recent plot ...') plot_new_cases(data, recent=True, aspect='wide') log.log('Rendering growth-percents-recent plot ...') plot_growth_percents(data, recent=True, aspect='wide') log.log('Rendering doubling-times-recent plot ...') plot_doubling_times(data, recent=True, aspect='wide') log.log('Rendering cured-percents-recent plot ...') plot_cured_percents(data, recent=True, aspect='wide') log.log('Rendering cured-ratios-percent plot ...') plot_cured_ratios(data, recent=True, aspect='wide')
def plot_all(data): """Plot all graphs.""" log.log('Rendering total-cases-linear-recent plot ...') plot_total_cases_linear(data, recent=True, aspect='square') log.log('Rendering total-cases-linear plot ...') plot_total_cases_linear(data, recent=False, aspect=None) log.log('Rendering total-cases-log-recent plot ...') plot_total_cases_log(data, recent=True, aspect='square') log.log('Rendering total-cases-log plot ...') plot_total_cases_log(data, recent=False, aspect=None) log.log('Rendering new-cases-recent plot ...') plot_new_cases(data, recent=True, aspect='square') log.log('Rendering new-cases plot ...') plot_new_cases(data, recent=False, aspect=None) log.log('Rendering growth-percents-recent plot ...') plot_growth_percents(data, recent=True, aspect='square') log.log('Rendering growth-percents plot ...') plot_growth_percents(data, recent=False, aspect=None) log.log('Rendering doubling-times-recent plot ...') plot_doubling_times(data, recent=True, aspect='square') log.log('Rendering doubling-times plot ...') plot_doubling_times(data, recent=False, aspect=None) log.log('Rendering cured-percents-recent plot ...') plot_cured_percents(data, recent=True, aspect='square') log.log('Rendering cured-percents plot ...') plot_cured_percents(data, recent=False, aspect=None) log.log('Rendering cured-ratios-percent plot ...') plot_cured_ratios(data, recent=True, aspect='square') log.log('Rendering cured-ratios plot ...') plot_cured_ratios(data, recent=False, aspect=None)
def wiki3(): """Generate Wikipedia markup code for statistics charts.""" ignore_dates = ('2020-02-04', '2020-02-27') data = archive.load(ignore_dates=ignore_dates) update = source = fetch_wiki_source(WIKI_SRC3) full_dates = ', '.join(x.strftime('%Y-%m-%d') for x in data.datetimes) # Cases. total_cases = ', '.join(str(y) for y in data.total_cases) active_cases = ', '.join(str(y) for y in data.active_cases) cured_cases = ', '.join(str(y) for y in data.cured_cases) death_cases = ', '.join(str(y) for y in data.death_cases) # New cases. total_dates, total_diffs, total_avgs = \ format_diffs(*expand_diffs(data.datetimes, data.total_diffs)) cured_dates, cured_diffs, cured_avgs = \ format_diffs(*expand_diffs(data.datetimes, data.cured_diffs)) death_dates, death_diffs, death_avgs = \ format_diffs(*trim_zeros(*expand_diffs(data.datetimes, data.death_diffs))) # Daily new cases vs. active cases. vs_dates, vs_percents, vs_avgs, vs_cagrs = \ vs_data(data.datetimes, data.total_diffs, data.active_cases) # CFR cfr_start = data.dates.index('2020-03-12') cfr_dates = ', '.join( x.strftime('%Y-%m-%d') for x in data.datetimes[cfr_start:]) cfr_percents = ', '.join('{:.2f}'.format(y) for y in data.cfr_percents[cfr_start:]) # For testing regex matches only. """ full_dates = '@@full_dates@@' total_cases = '@@total_cases@@' active_cases = '@@active_cases@@' cured_cases = '@@cured_cases@@' death_cases = '@@death_cases@@' total_dates = '@@total_dates@@' total_diffs = '@@total_diffs@@' total_avgs= '@@total_avgs@@' cured_dates = '@@cured_dates@@' cured_diffs = '@@cured_diffs@@' cured_avgs= '@@cured_avgs@@' death_dates = '@@death_dates@@' death_diffs = '@@death_diffs@@' death_avgs= '@@death_avgs@@' vs_dates = '@@vs_dates@@' vs_percents = '@@vs_percents@@' vs_avgs = '@@vs_avgs@@' vs_cagrs = '@@vs_cagrs@@' cfr_dates, cfr_percents = '@@cfr_dates@@', '@@cfr_percents@@' """ # Linear graph. update = replace_within('= Total confirmed .*?=.*? x = ', '\n', update, full_dates) update = replace_within('= Total confirmed .*?=.*? y1 =.*?--> ', '\n', update, total_cases) update = replace_within('= Total confirmed .*?=.*? y2 =.*?--> ', '\n', update, active_cases) update = replace_within('= Total confirmed .*?=.*? y3 =.*?--> ', '\n', update, cured_cases) update = replace_within('= Total confirmed .*?=.*? y4 =.*?--> ', '\n', update, death_cases) # Logarithmic graph. update = replace_within('= Total confirmed .*?=.*?log.*? x = ', '\n', update, full_dates) update = replace_within('= Total confirmed .*?=.*?log.*? y1 =.*?--> ', '\n', update, total_cases) update = replace_within('= Total confirmed .*?=.*?log.*? y2 =.*?--> ', '\n', update, active_cases) update = replace_within('= Total confirmed .*?=.*?log.*? y3 =.*?--> ', '\n', update, cured_cases) update = replace_within('= Total confirmed .*?=.*?log.*? y4 =.*?--> ', '\n', update, death_cases) # Daily new cases. update = replace_within('= Daily new cases =.*? x = ', '\n', update, total_dates) update = replace_within('= Daily new cases =.*? y1 =.*?--> ', '\n', update, total_diffs) update = replace_within('= Daily new cases =.*? y2 =.*?--> ', '\n', update, total_avgs) # Daily new deaths. update = replace_within('= Daily new deaths =.*? x = ', '\n', update, death_dates) update = replace_within('= Daily new deaths =.*? y1 =.*?--> ', '\n', update, death_diffs) update = replace_within('= Daily new deaths =.*? y2 =.*?--> ', '\n', update, death_avgs) # Daily new recoveries. update = replace_within('= Daily new recoveries =.*? x = ', '\n', update, cured_dates) update = replace_within('= Daily new recoveries =.*? y1 =.*?--> ', '\n', update, cured_diffs) update = replace_within('= Daily new recoveries =.*? y2 =.*?--> ', '\n', update, cured_avgs) # Daily new cases vs. active cases. update = replace_within('= Daily new cases vs active cases =.*? x = ', '\n', update, vs_dates) update = replace_within( '= Daily new cases vs active cases =.*? y1 =.*?--> ', '\n', update, vs_percents) update = replace_within( '= Daily new cases vs active cases =.*? y2 =.*?--> ', '\n', update, vs_avgs) update = replace_within( '= Daily new cases vs active cases =.*? y3 =.*?--> ', '\n', update, vs_cagrs) # CFR. update = replace_within('= Case fatality rate =.*? x = ', '\n', update, cfr_dates) update = replace_within('= Case fatality rate =.*? y = ', '\n', update, cfr_percents) log.log('Writing wiki3.txt and wiki3.diff ...') open('wiki3.txt', 'w').write(update) open('wiki3.diff', 'w').write(diff(source, update))
def load_region_data(home_data=None): """Return data retrieved from MoHFW data JSON.""" data = Data() # Retrieve MoHFW JSON data. url = 'https://www.mohfw.gov.in/data/datanew.json' log.log('Connecting to {} ...', url) items = json.load(urllib.request.urlopen(url)) # Parse the response. for item in items: region_name = item['state_name'] total = int(item['new_positive']) active = int(item['new_active']) cured = int(item['new_cured']) death = int(item['new_death']) if region_name == '': data.region_total = total data.region_active = active data.region_cured = cured data.region_death = death else: if total != (active + cured + death): log.log('WARN: region: Total mismatch for {}', region_name) data.regions[region_name] = (total, active, cured, death) # Region totals. region_total_sum = sum(v[0] for v in data.regions.values()) region_active_sum = sum(v[1] for v in data.regions.values()) region_cured_sum = sum(v[2] for v in data.regions.values()) region_death_sum = sum(v[3] for v in data.regions.values()) # Validations. if home_data and data.region_total != home_data.total: log.log('WARN: region: Mismatch in region total and home total') if home_data and data.region_active != home_data.active: log.log('WARN: region: Mismatch in region active and home active') if home_data and data.region_cured != home_data.cured: log.log('WARN: region: Mismatch in region cured and home cured') if home_data and data.region_death != home_data.death: log.log('WARN: region: Mismatch in region death and home death') if data.region_total != region_total_sum: log.log('WARN: region: Mismatch in region total and calculated sum') if data.region_active != region_active_sum: log.log('WARN: region: Mismatch in region active and calculated sum') if data.region_cured != region_cured_sum: log.log('WARN: region: Mismatch in region cured and calculated sum') if data.region_death != region_death_sum: log.log('WARN: region: Mismatch in region death and calculated sum') return data