def process_data_for_myanmar(self): try: a = WikipediaService(url="https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Myanmar") country = "Myanmar" table_text = "Confirmed COVID-19 cases by Township" start_row = "3" table = a.search_table(table_text, index=1) if start_row.isnumeric(): table = table[int(start_row) - 1:] res = [] for row in table: region = self.get_city_name(row[1], row[2]) if 'Total' in region: continue infected = row[3] deaths = row[-1] d = dict( region=region, infected=sanitize_digit(infected), deaths=sanitize_digit(deaths), recoveries="0", ) res.append(d) file_name = self.get_output_file(country) cleanup(file_name) self.write_output_for_country(res, country=country, file_name=file_name) move_to_final(file_name) except Exception as e: print(f"Exception fetching myanmar data: {e}")
def scrape_taiwan_data(self): print("Starting taiwan scrape") country = "Taiwan" r = requests.get( "https://services7.arcgis.com/HYXUMO0l0lNCIifa/arcgis/rest/services" "/Wuhan_Coronavirus_Taiwan_County/FeatureServer/0/query?f=json&where=Confirmed%3E0" "&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Confirmed" "%20desc&resultOffset=0&resultRecordCount=25&cacheHint=true") response = r.json() cleanup(taiwan_csv) for feature in response["features"]: attributes = feature["attributes"] city = attributes["COUNTYENG"] infected = attributes["Confirmed"] recovered = attributes["recover"] deaths = attributes["death"] latitude, longitude = self.geojson_service.get_lat_long( country, city) self.write_to_file(country=country, region=city, infected=infected, recoveries=recovered, deaths=deaths, long=longitude, lat=latitude, filename=taiwan_csv) move_to_final(taiwan_csv)
def process_data_for_malaysia(self): try: a = WikipediaService(url="https://en.m.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Malaysia") country = "Malaysia" table_text = "District/City" start_row = "3" table = a.search_table(table_text) if start_row.isnumeric(): table = table[int(start_row) - 1:] res = [] for row in table: region = self.get_city_name(row[0], row[1]) if 'Unknown' in region or 'Imported' in region: continue infected = row[-1] d = dict( region=region, infected=infected, deaths="0", recoveries="0", ) res.append(d) file_name = self.get_output_file(country) cleanup(file_name) self.write_output_for_country(res, country=country, file_name=file_name) move_to_final(file_name) except Exception as e: print(f"Exception fetching malaysia data: {e}")
def scrape_john_hopkins_data(self, date_str=None): print("Starting john hopkins scrape") excluded_countries = ["China"] countries = ["Australia", "Canada", "China"] curr_date = pendulum.now() processed = False if not date_str: date_str = curr_date.strftime("%m-%d-%Y") while not processed: print(f"Trying {date_str}...") r = requests.get( "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data" f"/csse_covid_19_daily_reports/{date_str}.csv") if r.status_code == 200: processed = True else: print( f"Error! Status Code: {r.status_code}. Data still not updated." ) curr_date = curr_date.subtract(days=1) date_str = curr_date.strftime("%m-%d-%Y") move_to_final(hopkins_csv) return if r.status_code == 200: req = StringIO(r.text) df = pd.read_csv(req).replace(np.nan, '', regex=True) df = df[df["Country_Region"].isin(countries)] else: print(r) return cleanup(hopkins_csv) for index, row in df.iterrows(): country = row["Country_Region"] if country in excluded_countries: continue city = row["Admin2"] region = row[ "Province_State"] if not city else f'{city}, {row["Province_State"]}' confirmed = row["Confirmed"] deaths = row["Deaths"] recoveries = row["Recovered"] active = row["Active"] latitude = row["Lat"] longitude = row["Long_"] if "Unassigned" in region: continue self.write_to_file(country=country, region=region, infected=confirmed, deaths=deaths, recoveries=recoveries, long=longitude, lat=latitude, filename=hopkins_csv) move_to_final(hopkins_csv) return df
def process_data_for_usa(self): try: wms = WorldoMeterService() country = "USA" records = wms.get_us_stats() file_name = self.get_output_file(country) cleanup(file_name) self.write_output_for_country(records, country=country, file_name=file_name) move_to_final(file_name) except Exception as e: print(f"Exception fetching usa data: {e}")
def process_global_stats(self): try: wms = WorldoMeterService() country = "global" records = wms.get_global_stats() file_name = self.get_output_file(country) cleanup(file_name) self.global_stats = records self.write_output_for_country(records, file_name=file_name) move_to_final(file_name) except Exception as e: print(f"Exception fetching global data: {e}")
def process_data_for_philippines(self): a = WikipediaService(url="https://en.m.wikipedia.org/wiki/2020_coronavirus_pandemic_in_the_Philippines") country = "Philippines" table_name = "Confirmed COVID-19 cases in the Philippines by region" start_row = "3" end_row = "-3" region_col = "1" infected_col = "2" death_col = "3" recovered_col = "7" table_index = "1" z = a.process_table(table_name, start_row, end_row, region_col, infected_col, death_col, recovered_col, table_index=table_index) res = [] for rec in z: if "validation" not in rec.get("region"): res.append(rec) file_name = self.get_output_file(country) cleanup(file_name) self.write_output_for_country(res, country=country, file_name=file_name) move_to_final(file_name)
def scrape_japan_data(self): print("Starting japan scrape") country = "Japan" r = requests.get('https://data.covid19japan.com/summary/latest.json') response = r.json() prefectures = response["prefectures"] cleanup(japan_csv) for prefecture in prefectures: region = prefecture["name"] confirmed = prefecture["confirmed"] deaths = prefecture["deaths"] recoveries = prefecture["recovered"] latitude, longitude = self.geojson_service.get_lat_long( country, region) self.write_to_file(country=country, region=region, infected=confirmed, deaths=deaths, recoveries=recoveries, long=longitude, lat=latitude, filename=japan_csv) move_to_final(japan_csv)
def process_data_for_thailand(self): try: a = WikipediaService(url="https://en.m.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Thailand") country = "Thailand" table_name = "Confirmed COVID-19 cases" start_row = "3" end_row = "-2" region_col = "2" infected_col = "3" death_col = "11" recovered_col = "" table_index = "2" z = a.process_table(table_name, start_row, end_row, region_col, infected_col, death_col, recovered_col, table_index=table_index) res = [] for rec in z: if "Total" not in rec.get("region"): res.append(rec) file_name = self.get_output_file(country) cleanup(file_name) self.write_output_for_country(res, country=country, file_name=file_name) move_to_final(file_name) except Exception as e: print(f"Exception fetching thailand data: {e}")
def scrape_philippines_data(self): s = Scraper(url="https://covid19stats.ph/stats/by-location") table = s.find_table_with_text("Quezon") lst = s.table_to_2d_list(table) if not lst: return [] country = "Philippines" cleanup(philip_csv) for row in lst[1:]: city = self.get_city_name(row[0]) infected = row[1] recovered = row[3] deaths = row[5] latitude, longitude = self.geojson_service.get_lat_long( country, city) self.write_to_file(country=country, region=city, infected=infected, recoveries=recovered, deaths=deaths, long=longitude, lat=latitude, filename=philip_csv) move_to_final(philip_csv)
def process_input(self, data): cleanup(self.get_output_file("wikipedia")) for row in data: self.process_row(row) move_to_final(self.get_output_file("wikipedia"))
def copy_static_files(self): move_to_final(china_csv)