def load2mysql(self): with open(self.path) as f: provinces = json.load(f) for province in provinces: # print province["name"] p = Province(name=province["name"]) p.save() for city in province["city"]: # print " %s" % city["name"] c = City(name=city["name"], province=p) c.save() for area in city["area"]: # print " %s" % area cou = County(name=area, city=c) cou.save()
def load_provinces(): data = read_excel('data/tambon.xlsx') data['changwat'] = \ data['CHANGWAT_T'].str.split('.').str.get(1).str.lstrip() added_ps = set() for _, row in data[['changwat', 'CH_ID']].iterrows(): if str(row['CH_ID']) not in added_ps: p = Province(code=str(row['CH_ID']), name=unicode(row['changwat'])) db.session.add(p) added_ps.add(str(row['CH_ID'])) db.session.commit()
def handle_province(self): try: for k,v in province_dict.items(): print("%s - %s" % (k,v)) province = Province(str(k), v) self.db.add(province) self.db.commit() except Exception as e: Log.get_logger().exception(e) raise print("handle province ok!")
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Sulawesi Selatan") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Sulawesi Selatan", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select() .join(Province) .where(fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://covid19.sulselprov.go.id" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=False) data = r.text # print(data) data = re.sub(r"<!--", "", data) data = re.sub(r"-->", "", data) url = soup(data, "lxml") title = url.find("h4", attrs={"class": "text-danger"}).text pos = str(title).rfind("-") _last_update = str(title)[pos + 1 :] table = url.find("table", attrs={"class": "table table-striped"}) if table is not None: res = [] table_rows = table.find_all("tr") num_rows = len(table_rows) # print(num_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: list_item = {} list_item["provinsi"] = "Sulawesi Selatan" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = row[1] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = int(str(row[2]).rstrip()) list_item["n_pdp"] = int(str(row[3]).rstrip()) list_item["n_confirm"] = int(str(row[4]).rstrip()) list_item["n_meninggal"] = "N/A" list_item["n_sembuh"] = "N/A" list_item["last_update"] = _last_update # print(list_item) output["result"].append(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1] ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[1]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=int(str(row[2]).rstrip()), n_pdp=int(str(row[3]).rstrip()), n_confirm=int(str(row[4]).rstrip()), last_update=dateparser.parse(_last_update), ) i = i + 1 return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where( Province.nama_prov == "Kalimantan Selatan") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Kalimantan Selatan", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://corona.kalselprov.go.id/cov_map" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text json_data = json.loads(data) # print(json_data) for data in json_data: list_item = {} list_item["provinsi"] = "Kalimantan Selatan" list_item["kode_kab_kota"] = data["code"] list_item["kab_kota"] = data["name"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = data["cov_odp_count"] list_item["n_pdp"] = data["cov_pdp_count"] list_item["n_confirm"] = data["cov_positive_count"] list_item["n_meninggal"] = data["cov_died_count"] list_item["n_sembuh"] = data["cov_recovered_count"] list_item["last_update"] = "N/A" kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == data["name"]) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=data["name"], kode=data["code"]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == datetime.datetime.now()) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=data["cov_odp_count"], n_pdp=data["cov_pdp_count"], n_confirm=data["cov_positive_count"], n_meninggal=data["cov_died_count"], n_sembuh=data["cov_recovered_count"], last_update=datetime.datetime.now(), ) output["result"].append(list_item) return output
a_InputData = [ 1, 5, 8, 11, 11, 37, 16, 14, 9, 14, 10, 15, 10, 17, 11, 15, 6, 4, 8, 7, 6, 4, 9, 7, 6, 5 ] a_ActualActiveCases = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71, 64, 65, 61, 59, 57 ] import random import matplotlib.pyplot as plt from models import Province, Case print("Creating Province object...") o_Province = Province(a_InputData)
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Jawa Tengah") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Jawa Tengah", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://corona.jatengprov.go.id/data" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) tree = html.fromstring(r.text) _last_update = tree.xpath( "//section[5]/div/div/div[1]/div/p/text()")[0].strip() table_elem = tree.xpath("//section[5]/div/div/div[2]/div/div/table")[0] table_str = etree.tostring(table_elem) if table_str is not None and table_str != "": res = [] df = pd.read_html(table_str)[0] df["positif"] = df["Positif: Sembuh"] + df[ "Positif: Meninggal"] + df["Positif: Dirawat"] table = df.to_dict("records") for row in table: list_item = {} list_item["provinsi"] = "Jawa Tengah" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = row["Kabupaten/Kota"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = row["ODP: Proses"] list_item["n_pdp"] = row["PDP: Dirawat"] list_item["n_confirm"] = row["positif"] list_item["n_meninggal"] = row["Positif: Meninggal"] list_item["n_sembuh"] = row["Positif: Sembuh"] list_item["last_update"] = _last_update kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == list_item["kab_kota"], ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=list_item["kab_kota"]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_pdp=list_item["n_pdp"], n_confirm=list_item["n_confirm"], n_meninggal=list_item["n_meninggal"], n_sembuh=list_item["n_sembuh"], last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) return output
import datetime # Start the comments. # We need to save these objects before referencing them later. henry_hiker = Hiker( 'Henry', 'Hiker', "hhiker1", 'France', '*****@*****.**', 10, "https://res.cloudinary.com/c7oud0311/image/upload/v1594609854/project3/profile1_tbjthv.jpg" ).save() joe_jogger = Hiker( 'Joe', 'Jogger', "jjogger1", 'UK', '*****@*****.**', 12, "https://res.cloudinary.com/c7oud0311/image/upload/v1594654152/project3/profile2_vcog5c.jpg" ).save() location_ids = Location.objects.bulk_create([ Location('Ireland', province=[Province(state='Leinster', town=Town(town='Carlow'))]), Location('Ireland', province=[Province(state='Leinster', town=Town(town='Dublin'))]), Location('Ireland', province=[Province(state='Leinster', town=Town(town='Kilkenny'))]), Location('Ireland', province=[Province(state='Leinster', town=Town(town='Wicklow'))]), Location('Ireland', province=[Province(state='Munster', town=Town(town='Cork'))]), Location('Ireland', province=[Province(state='Munster', town=Town(town='Killarney'))]), # 5 Location('Ireland', province=[Province(state='Munster', town=Town(town='Limerick'))]), Location('Ireland',
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Aceh") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Aceh", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://covid.bravo.siat.web.id/json/peta" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text json_data = json.loads(data) # print(json_data) for data in json_data: list_item = {} list_item["provinsi"] = "Aceh" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = data["namaKabupaten"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = data["latitude"] list_item["long_kab_kota"] = data["longitude"] list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = data["odp"] list_item["n_pdp"] = data["pdp"] list_item["n_confirm"] = data["positif"] list_item["n_meninggal"] = data["positifMeninggal"] list_item["n_sembuh"] = data["positifSembuh"] list_item["last_update"] = data["updateDate"] kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == data["namaKabupaten"], ) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=data["namaKabupaten"], lat=data["latitude"], lon=data["longitude"], ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(data["updateDate"]), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=data["odp"], n_pdp=data["pdp"], n_confirm=data["positif"], n_meninggal=data["positifMeninggal"], n_sembuh=data["positifSembuh"], last_update=dateparser.parse(data["updateDate"]), ) output["result"].append(list_item) return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where( Province.nama_prov == "Daerah Istimewa Yogyakarta" ) if propinsi.count() < 1: propinsi = Province.create(nama_prov="Daerah Istimewa Yogyakarta", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select() .join(Province) .where(fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result # konfigurasi chromedriver chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1420,1080") chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") browser = webdriver.Chrome(chrome_options=chrome_options) hidden = "/html/body/div[2]/div[2]/div/div/form/input[1]" kodepos = '//*[@id="fname"]' button = "/html/body/div[2]/div[2]/div/div/form/button" directory = Path().absolute() kodepos_df = pd.read_csv(str(directory)+"/data/Data_KodePos_Kecamatan_DIY.csv", delimiter=";") output = {} output["result"] = [] for index, row in kodepos_df.iterrows(): # konfigurasi base URL link = "https://sebaran-covid19.jogjaprov.go.id/kodepos" browser.get(link) kode_pos = str(row["kode_pos"]) e = browser.find_element_by_xpath(hidden).get_attribute("value") e = browser.find_element_by_xpath(kodepos) e.send_keys(kode_pos) e = browser.find_element_by_xpath(button) e.click() # time.sleep(5) data = browser.page_source # print(data) url = soup(data, "lxml") odp = url.find("b", {"id": "odp"}) pdp = url.find("b", {"id": "pdp"}) positif = url.find("b", {"id": "positif"}) last_update_blok = url.find("div", {"class": "dataupdate"}) populasi = url.find("b", {"id": "populasi"}) if populasi is None: populasi = url.find("strong", {"id": "populasi"}) for item in last_update_blok.contents: if item.name == "p": if item.has_attr("style") == False: _last_update = item.text.replace("Data Update ", "").rstrip() list_item = {} list_item["provinsi"] = "Daerah Istimewa Yogyakarta" list_item["kode_kab_kota"] = str(row["kode_wilayah"]) list_item["kab_kota"] = str(row["kabupaten_kota"]) list_item["kecamatan"] = str(row["nama_kecamatan"]) list_item["populasi"] = str(populasi.text).rstrip() list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = int(str(odp.text).rstrip()) list_item["n_pdp"] = int(str(pdp.text).rstrip()) list_item["n_confirm"] = int(str(positif.text).rstrip()) list_item["n_meninggal"] = "N/A" list_item["n_sembuh"] = "N/A" list_item["last_update"] = _last_update kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == str(row["kabupaten_kota"]), ) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=str(row["kabupaten_kota"]), kode=str(row["kode_wilayah"]), ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=int(str(odp.text).rstrip()), n_pdp=int(str(pdp.text).rstrip()), n_confirm=int(str(positif.text).rstrip()), last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) browser.stop_client() browser.close() browser.quit() return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Bali") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Bali", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://pendataan.baliprov.go.id/" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text url = soup(data, "lxml") con = url.find_all("div", attrs={"card-header"}) title = con[6].find("h3").text pos = str(title).rfind("Dengan ") _last_update = str(title)[pos + 7:] table = url.find("table", attrs={"class": "table"}) # print(table) if table is not None: res = [] table_rows = table.find_all("tr") num_rows = len(table_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: if row: list_item = {} list_item["provinsi"] = "Bali" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = row[0] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = "N/A" list_item["n_pdp"] = int(str(row[7]).rstrip()) list_item["n_confirm"] = int(str(row[6]).rstrip()) list_item["n_meninggal"] = int(str(row[9]).rstrip()) list_item["n_sembuh"] = int(str(row[8]).rstrip()) list_item["last_update"] = _last_update # print(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[0], ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[0]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_pdp=int(str(row[7]).rstrip()), n_confirm=int(str(row[6]).rstrip()), n_meninggal=int(str(row[9]).rstrip()), n_sembuh=int(str(row[8]).rstrip()), last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) i = i + 1 return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Banten") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Banten", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://infocorona.bantenprov.go.id/" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text url = soup(data, "lxml") script = url.find_all("script") json_data = "" for item in script: if re.search(r"pieSeries.data\s\=\s(.*)\;", str(item)): var_data = re.findall(r"pieSeries.data\s\=\s(.*)\;", str(item)) json_data = json.loads(str(var_data[0])) for data in json_data: list_item = {} list_item["provinsi"] = "Banten" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = data["title"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = data["latitude"] list_item["long_kab_kota"] = data["longitude"] list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = data["pieData"][0]["value"] list_item["n_pdp"] = data["pieData"][1]["value"] list_item["n_confirm"] = data["pieData"][2]["value"] list_item["n_meninggal"] = "N/A" list_item["n_sembuh"] = "N/A" list_item["last_update"] = "N/A" kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == data["title"]) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=data["title"], lat=data["latitude"], lon=data["longitude"], populasi="", ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == datetime.datetime.now()) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=data["pieData"][0]["value"], n_pdp=data["pieData"][1]["value"], n_confirm=data["pieData"][2]["value"], last_update=datetime.datetime.now(), ) output["result"].append(list_item) return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where( Province.nama_prov == "Nusa Tenggara Barat") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Nusa Tenggara Barat", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "cache-control": "max-age=0", "cookie": "XSRF-TOKEN=eyJpdiI6IjJGSjNJWmxJS1AzNExHQ1poVDZPeWc9PSIsInZhbHVlIjoiTVEvWTVSWHZJSUtyY0RaalFPa2tzZW1hWmJYN0ZucGtEMXFtNGRRN3RLQXloVkxwNC90VEZMZHozYk1kV1cvLyIsIm1hYyI6ImVlM2NjOTg4YTA2YzMxZjllZGE3MGM0Njk1YTJmZGU1Nzc3ZGE4MmM1MWRlNTg4YWFjZWQ4MWQxZmUzMzkyNzEifQ%3D%3D; laravel_session=eyJpdiI6InN3a2JkdGJPcWMvNmVxbmxBZGxCK2c9PSIsInZhbHVlIjoiM1dwZmdmUHdNY3RwWG9oVXJqM2dYQmZSWnlEakY3TkVNZ2Mra21RY3hLN3V0UGMwQWxVbzhSbU5NNjR0aHdyeiIsIm1hYyI6ImQxNzYyMWI2MjhkMDRlYTY1Mjc4NDFhMTRkMzZiNDliNjdkY2NiNDkxZTY1NTRjZTIxZGVjZGE1YjkzZmUyZWYifQ%3D%3D", "referer": "https://corona.ntbprov.go.id/", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "******", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36", } link = "https://corona.ntbprov.go.id/list-data" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=False, headers=headers) data = r.text url = soup(data, "lxml") table = url.find("table", attrs={"class": "table table-bordered table-striped"}) # print(table) if table is not None: res = [] th = table.find("th") info_date = th.text.replace("\n", "").replace(" ", "") pos_l = info_date.find(",") pos_r = info_date.rfind("Pukul") _last_update = info_date[pos_l + 1:pos_r] table_rows = table.find_all("tr") num_rows = len(table_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: if row: list_item = {} list_item["provinsi"] = "Nusa Tenggara Barat" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = (str(row[0]).replace( "\n", "").replace(" ", " ")) list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = int(str(row[5]).rstrip()) list_item["n_odp"] = int(str(row[8]).rstrip()) list_item["n_pdp"] = int(str(row[11]).rstrip()) list_item["n_confirm"] = int(str(row[14]).rstrip()) list_item["n_meninggal"] = int(str(row[16]).rstrip()) list_item["n_sembuh"] = int(str(row[17]).rstrip()) list_item["last_update"] = _last_update # print(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1], ) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=str(row[0]).replace("\n", "").replace( " ", " "), ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_otg=int(str(row[5]).rstrip()), n_odp=int(str(row[8]).rstrip()), n_pdp=int(str(row[11]).rstrip()), n_confirm=int(str(row[14]).rstrip()), n_meninggal=int(str(row[16]).rstrip()), n_sembuh=int(str(row[17]).rstrip()), last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) i = i + 1 return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Sulawesi Barat") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Sulawesi Barat", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://covid19.sulbarprov.go.id/utama/data" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text url = soup(data, "lxml") table = url.find("table", attrs={"class": "table-responsive"}) # print(table) if table is not None: res = [] table_rows = table.find_all("tr") num_rows = len(table_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: if row: list_item = {} list_item["provinsi"] = "Sulawesi Barat" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = str(row[1]).rstrip() list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = int(str(row[6]).rstrip()) list_item["n_odp"] = int(str(row[2]).rstrip()) list_item["n_pdp"] = int(str(row[10]).rstrip()) list_item["n_confirm"] = int(str(row[14]).rstrip()) list_item["n_meninggal"] = ( int(str(row[5]).rstrip()) + int(str(row[9]).rstrip()) + int(str(row[12]).rstrip()) + int(str(row[18]).rstrip())) list_item["n_sembuh"] = int(str(row[17]).rstrip()) list_item["last_update"] = "N/A" # print(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1], ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[1]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == datetime.datetime.now(), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_otg=int(str(row[6]).rstrip()), n_odp=int(str(row[2]).rstrip()), n_pdp=int(str(row[10]).rstrip()), n_confirm=int(str(row[14]).rstrip()), n_meninggal=int(str(row[5]).rstrip()) + int(str(row[9]).rstrip()) + int(str(row[12]).rstrip()) + int(str(row[18]).rstrip()), n_sembuh=int(str(row[17]).rstrip()), last_update=datetime.datetime.now(), ) output["result"].append(list_item) i = i + 1 return output