def process_data_for_myanmar(self):
     try:
         a = WikipediaService(url="https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Myanmar")
         country = "Myanmar"
         table_text = "Confirmed COVID-19 cases by Township"
         start_row = "3"
         table = a.search_table(table_text, index=1)
         if start_row.isnumeric():
             table = table[int(start_row) - 1:]
         res = []
         for row in table:
             region = self.get_city_name(row[1], row[2])
             if 'Total' in region:
                 continue
             infected = row[3]
             deaths = row[-1]
             d = dict(
                 region=region,
                 infected=sanitize_digit(infected),
                 deaths=sanitize_digit(deaths),
                 recoveries="0",
             )
             res.append(d)
         file_name = self.get_output_file(country)
         cleanup(file_name)
         self.write_output_for_country(res, country=country, file_name=file_name)
         move_to_final(file_name)
     except Exception as e:
         print(f"Exception fetching myanmar data: {e}")
Example #2
0
 def scrape_taiwan_data(self):
     print("Starting taiwan scrape")
     country = "Taiwan"
     r = requests.get(
         "https://services7.arcgis.com/HYXUMO0l0lNCIifa/arcgis/rest/services"
         "/Wuhan_Coronavirus_Taiwan_County/FeatureServer/0/query?f=json&where=Confirmed%3E0"
         "&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Confirmed"
         "%20desc&resultOffset=0&resultRecordCount=25&cacheHint=true")
     response = r.json()
     cleanup(taiwan_csv)
     for feature in response["features"]:
         attributes = feature["attributes"]
         city = attributes["COUNTYENG"]
         infected = attributes["Confirmed"]
         recovered = attributes["recover"]
         deaths = attributes["death"]
         latitude, longitude = self.geojson_service.get_lat_long(
             country, city)
         self.write_to_file(country=country,
                            region=city,
                            infected=infected,
                            recoveries=recovered,
                            deaths=deaths,
                            long=longitude,
                            lat=latitude,
                            filename=taiwan_csv)
     move_to_final(taiwan_csv)
 def process_data_for_malaysia(self):
     try:
         a = WikipediaService(url="https://en.m.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Malaysia")
         country = "Malaysia"
         table_text = "District/City"
         start_row = "3"
         table = a.search_table(table_text)
         if start_row.isnumeric():
             table = table[int(start_row) - 1:]
         res = []
         for row in table:
             region = self.get_city_name(row[0], row[1])
             if 'Unknown' in region or 'Imported' in region:
                 continue
             infected = row[-1]
             d = dict(
                 region=region,
                 infected=infected,
                 deaths="0",
                 recoveries="0",
             )
             res.append(d)
         file_name = self.get_output_file(country)
         cleanup(file_name)
         self.write_output_for_country(res, country=country, file_name=file_name)
         move_to_final(file_name)
     except Exception as e:
         print(f"Exception fetching malaysia data: {e}")
Example #4
0
 def scrape_john_hopkins_data(self, date_str=None):
     print("Starting john hopkins scrape")
     excluded_countries = ["China"]
     countries = ["Australia", "Canada", "China"]
     curr_date = pendulum.now()
     processed = False
     if not date_str:
         date_str = curr_date.strftime("%m-%d-%Y")
     while not processed:
         print(f"Trying {date_str}...")
         r = requests.get(
             "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data"
             f"/csse_covid_19_daily_reports/{date_str}.csv")
         if r.status_code == 200:
             processed = True
         else:
             print(
                 f"Error! Status Code: {r.status_code}. Data still not updated."
             )
             curr_date = curr_date.subtract(days=1)
             date_str = curr_date.strftime("%m-%d-%Y")
             move_to_final(hopkins_csv)
             return
     if r.status_code == 200:
         req = StringIO(r.text)
         df = pd.read_csv(req).replace(np.nan, '', regex=True)
         df = df[df["Country_Region"].isin(countries)]
     else:
         print(r)
         return
     cleanup(hopkins_csv)
     for index, row in df.iterrows():
         country = row["Country_Region"]
         if country in excluded_countries:
             continue
         city = row["Admin2"]
         region = row[
             "Province_State"] if not city else f'{city}, {row["Province_State"]}'
         confirmed = row["Confirmed"]
         deaths = row["Deaths"]
         recoveries = row["Recovered"]
         active = row["Active"]
         latitude = row["Lat"]
         longitude = row["Long_"]
         if "Unassigned" in region:
             continue
         self.write_to_file(country=country,
                            region=region,
                            infected=confirmed,
                            deaths=deaths,
                            recoveries=recoveries,
                            long=longitude,
                            lat=latitude,
                            filename=hopkins_csv)
     move_to_final(hopkins_csv)
     return df
 def process_data_for_usa(self):
     try:
         wms = WorldoMeterService()
         country = "USA"
         records = wms.get_us_stats()
         file_name = self.get_output_file(country)
         cleanup(file_name)
         self.write_output_for_country(records, country=country, file_name=file_name)
         move_to_final(file_name)
     except Exception as e:
         print(f"Exception fetching usa data: {e}")
 def process_global_stats(self):
     try:
         wms = WorldoMeterService()
         country = "global"
         records = wms.get_global_stats()
         file_name = self.get_output_file(country)
         cleanup(file_name)
         self.global_stats = records
         self.write_output_for_country(records, file_name=file_name)
         move_to_final(file_name)
     except Exception as e:
         print(f"Exception fetching global data: {e}")
 def process_data_for_philippines(self):
     a = WikipediaService(url="https://en.m.wikipedia.org/wiki/2020_coronavirus_pandemic_in_the_Philippines")
     country = "Philippines"
     table_name = "Confirmed COVID-19 cases in the Philippines by region"
     start_row = "3"
     end_row = "-3"
     region_col = "1"
     infected_col = "2"
     death_col = "3"
     recovered_col = "7"
     table_index = "1"
     z = a.process_table(table_name, start_row, end_row, region_col, infected_col, death_col, recovered_col,
                         table_index=table_index)
     res = []
     for rec in z:
         if "validation" not in rec.get("region"):
             res.append(rec)
     file_name = self.get_output_file(country)
     cleanup(file_name)
     self.write_output_for_country(res, country=country, file_name=file_name)
     move_to_final(file_name)
Example #8
0
 def scrape_japan_data(self):
     print("Starting japan scrape")
     country = "Japan"
     r = requests.get('https://data.covid19japan.com/summary/latest.json')
     response = r.json()
     prefectures = response["prefectures"]
     cleanup(japan_csv)
     for prefecture in prefectures:
         region = prefecture["name"]
         confirmed = prefecture["confirmed"]
         deaths = prefecture["deaths"]
         recoveries = prefecture["recovered"]
         latitude, longitude = self.geojson_service.get_lat_long(
             country, region)
         self.write_to_file(country=country,
                            region=region,
                            infected=confirmed,
                            deaths=deaths,
                            recoveries=recoveries,
                            long=longitude,
                            lat=latitude,
                            filename=japan_csv)
     move_to_final(japan_csv)
 def process_data_for_thailand(self):
     try:
         a = WikipediaService(url="https://en.m.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Thailand")
         country = "Thailand"
         table_name = "Confirmed COVID-19 cases"
         start_row = "3"
         end_row = "-2"
         region_col = "2"
         infected_col = "3"
         death_col = "11"
         recovered_col = ""
         table_index = "2"
         z = a.process_table(table_name, start_row, end_row, region_col, infected_col, death_col, recovered_col,
                             table_index=table_index)
         res = []
         for rec in z:
             if "Total" not in rec.get("region"):
                 res.append(rec)
         file_name = self.get_output_file(country)
         cleanup(file_name)
         self.write_output_for_country(res, country=country, file_name=file_name)
         move_to_final(file_name)
     except Exception as e:
         print(f"Exception fetching thailand data: {e}")
Example #10
0
 def scrape_philippines_data(self):
     s = Scraper(url="https://covid19stats.ph/stats/by-location")
     table = s.find_table_with_text("Quezon")
     lst = s.table_to_2d_list(table)
     if not lst:
         return []
     country = "Philippines"
     cleanup(philip_csv)
     for row in lst[1:]:
         city = self.get_city_name(row[0])
         infected = row[1]
         recovered = row[3]
         deaths = row[5]
         latitude, longitude = self.geojson_service.get_lat_long(
             country, city)
         self.write_to_file(country=country,
                            region=city,
                            infected=infected,
                            recoveries=recovered,
                            deaths=deaths,
                            long=longitude,
                            lat=latitude,
                            filename=philip_csv)
     move_to_final(philip_csv)
 def process_input(self, data):
     cleanup(self.get_output_file("wikipedia"))
     for row in data:
         self.process_row(row)
     move_to_final(self.get_output_file("wikipedia"))
Example #12
0
 def copy_static_files(self):
     move_to_final(china_csv)