def get_monthly_top_commented_articles(top=10, since=None): months = Comment.select(fn.date_trunc('month', Comment.created)) if since: months = months.where(Comment.created >= since) months = months.group_by(fn.date_trunc('month', Comment.created)).order_by( SQL('date_trunc').asc()).tuples() month_top_commented_articles_map = {m[0]: [] for m in months} output_formatter = lambda mtc: (Asset.get_by_id(mtc[1]).url, mtc[2]) for month in months: month_top_commented_articles_map[month[0]].extend( list( map( output_formatter, Comment.select( fn.date_trunc('month', Comment.created), Comment.asset_id, fn.count(Comment.id)).group_by( fn.date_trunc('month', Comment.created), Comment.asset_id).where( fn.date_trunc('month', Comment.created) == month).order_by((SQL('date_trunc')), (SQL('count')).desc()).limit( int(top)).tuples()))) first_month = min(month_top_commented_articles_map.keys()) last_month = max(month_top_commented_articles_map.keys()) months = get_week_or_month_counter(metric='month', first_metric_value=first_month, last_metric_value=last_month) monthly_top_commented_articles = list( month_top_commented_articles_map.items()) ret = fill_output_with_default_values( metric_counter=months, output=monthly_top_commented_articles, default_value=[]) return ret
def get_weekly_comments_count(since=None): approved = Comment.select(fn.date_trunc('week', Comment.created), fn.count(Comment.id)).group_by( fn.date_trunc('week', Comment.created)).order_by( SQL('date_trunc').asc()) rejected = RejectedComment.select( fn.date_trunc('week', RejectedComment.created), fn.count(RejectedComment.id)).group_by( fn.date_trunc('week', RejectedComment.created)).order_by( SQL('date_trunc').asc()) if since: approved = approved.where(Comment.created >= since) rejected = rejected.where(RejectedComment.created >= since) approved = approved.tuples() rejected = rejected.tuples() first_week = min(approved[0][0], rejected[0][0]) last_week = max(approved[-1][0], rejected[-1][0]) weeks = get_week_or_month_counter(metric='week', first_metric_value=first_week, last_metric_value=last_week) ret = merge_approved_rejected_list(metric_counter=weeks, approved=approved, rejected=rejected) return ret
def get_yearly_unique_commenters_count(): yearly_commenters_count = Comment.select( fn.date_trunc('year', Comment.created), fn.count(fn.Distinct(Comment.commenter_id))).group_by( fn.date_trunc('year', Comment.created)).order_by(SQL('date_trunc').asc()) return [(m.date().isoformat(), c) for m, c in yearly_commenters_count.tuples()]
def get_curr_year_top_commenters(top=3): curr_year_top_commenters = Comment.select( fn.date_trunc('year', Comment.created), Comment.commenter, fn.count(Comment.id)).group_by(fn.date_trunc( 'year', Comment.created), Comment.commenter).where( Comment.created >= arrow.utcnow().span('year')[0].date() ).order_by((SQL('count').desc())).limit(int(top)) return [(commenter['name'], count) for y, commenter, count in curr_year_top_commenters.tuples()]
def aggregate_fb_board(self): config_list = webapp_facebook_main\ .select(webapp_keyword_match.kwgrp, fn.date_trunc('day', webapp_facebook_main.date),fn.Count(1),fn.Sum(webapp_facebook_main.positive).alias('positive'),fn.Sum(webapp_facebook_main.negative).alias('negative'))\ .join(webapp_keyword_match, on=(webapp_facebook_main.url == webapp_keyword_match.guid).alias('km'))\ .where(webapp_facebook_main.tm >=self.fromts, webapp_keyword_match.tm >= self.fromts)\ .group_by(webapp_keyword_match.kwgrp, fn.date_trunc('day', webapp_facebook_main.date)) for c in config_list: wks =webapp_keyword_stat.get_or_create(kwgrp = c.km.kwgrp, \ date =c.date_trunc, source= 'facebook') wks.cnt = c.count wks.positive = c.positive wks.negative = c.negative wks.save()
def getStatistics(self): bins_in_queue = self.get_binary_queue().qsize() entries_in_db = len(BinaryDetonationResult().select()) scanned_bins = BinaryDetonationResult().select( fn.COUNT(BinaryDetonationResult.md5)).where( BinaryDetonationResult.last_scan_date) rates = BinaryDetonationResult().select( fn.COUNT(BinaryDetonationResult.md5).alias('rate')).where( BinaryDetonationResult.last_scan_date).group_by( fn.date_trunc( 'minute', BinaryDetonationResult.last_scan_date)).order_by( SQL('rate')).dicts() therate = 0.0 sum_rates = 0.0 count = 0 minrate = -1 maxrate = 0 for rate in rates: therate = rate['rate'] sum_rates += therate minrate = therate if therate < minrate else ( minrate if minrate is not -1 else therate) maxrate = therate if therate >= maxrate else therate count += 1 avgrate = sum_rates / count if count > 0 else 1 return { "dbentries": str(entries_in_db), "Maximum 1 minute scanning rate": str(maxrate), "Average 1 minute scanning rate": str(avgrate), "scanned": str(json.dumps(scanned_bins.dicts().get())) }
def get_monthly_unique_commenters_count(since=None): monthly_commenters_count = Comment.select( fn.date_trunc('month', Comment.created), fn.count(fn.Distinct(Comment.commenter_id))).group_by( fn.date_trunc('month', Comment.created)).order_by(SQL('date_trunc').asc()) if since: monthly_commenters_count = monthly_commenters_count.where( Comment.created >= since) monthly_commenters_count = monthly_commenters_count.tuples() first_month = monthly_commenters_count[0][0] last_month = monthly_commenters_count[-1][0] months = get_week_or_month_counter(metric='month', first_metric_value=first_month, last_metric_value=last_month) ret = fill_output_with_default_values(metric_counter=months, output=monthly_commenters_count) return ret
def show(): # https://<url>/api/v1/timeslots/show?d=<d>&m=<m>&y=<y> d = int(request.args.get('d')) m = int(request.args.get('m')) y = int(request.args.get('y')) slots_taken = [] # Select all the orders that match the selected_date and has 12 images in the order orders = Order.select()\ .join(Image)\ .where((fn.date_trunc('day', Order.start_time) == datetime(y, m, d)))\ .group_by(Order.id)\ .having(fn.count(Image.id) == 12) slots_taken = [o.start_time for o in orders] return jsonify({'slotsTaken': slots_taken})
def user_add_cart(recipe_id): user_id = get_jwt_identity() user = User.get_or_none(User.id == user_id) ingredients = request.json["selectedIngredients"] recipe = Recipe.get_or_none(Recipe.id == recipe_id) subscription_recipes = Subscription_Recipe.select().where( Subscription_Recipe.user == user.id, Subscription_Recipe.created_at.between( fn.date_trunc('week', date.today()), date.today() + timedelta(days=1))) temp = Subscription_Recipe.select().where( Subscription_Recipe.user == user.id, Subscription_Recipe.created_at >= datetime.date.today(), Subscription_Recipe.is_checkedout == 0, Subscription_Recipe.recipe == recipe.id) if temp: return jsonify({"message": "Item is already in the cart"}) else: if len(subscription_recipes) >= (user.subscription.amount_of_meals): return jsonify({ "message": "You have reached the maximum amount of meals selected in a week" }) else: new_subscription_recipe = Subscription_Recipe( user=user.id, subscription=user.subscription.id, recipe=recipe.id) new_subscription_recipe.save() for ingredient in ingredients: user_recipe = Subscription_Recipe.select().where( Subscription_Recipe.recipe == recipe.id, Subscription_Recipe.user == user.id).order_by( Subscription_Recipe.created_at.desc()).get() order = Order(subscription_recipe=user_recipe.id, ingredient=ingredient) order.save() if order.save(): return jsonify({"message": "Successfully added to cart"}) else: return jsonify({"message": "Error occured"})
def week(): user_id = get_jwt_identity() user = User.get_or_none(User.id == user_id) subscription_recipes = Subscription_Recipe.select().where( Subscription_Recipe.user == user.id, Subscription_Recipe.created_at.between( fn.date_trunc('week', date.today()), date.today() + timedelta(days=1)), Subscription_Recipe.is_checkedout == 0) return jsonify([{ "id": s.id, "user": s.user.id, "subscription": s.subscription.id, "recipe": s.recipe.id, "recipe_image_path": app.config.get("S3_LOCATION") + s.recipe.image_url, "recipe_name": s.recipe.recipe_name } for s in subscription_recipes])
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Bali") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Bali", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://pendataan.baliprov.go.id/" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text url = soup(data, "lxml") con = url.find_all("div", attrs={"card-header"}) title = con[6].find("h3").text pos = str(title).rfind("Dengan ") _last_update = str(title)[pos + 7:] table = url.find("table", attrs={"class": "table"}) # print(table) if table is not None: res = [] table_rows = table.find_all("tr") num_rows = len(table_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: if row: list_item = {} list_item["provinsi"] = "Bali" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = row[0] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = "N/A" list_item["n_pdp"] = int(str(row[7]).rstrip()) list_item["n_confirm"] = int(str(row[6]).rstrip()) list_item["n_meninggal"] = int(str(row[9]).rstrip()) list_item["n_sembuh"] = int(str(row[8]).rstrip()) list_item["last_update"] = _last_update # print(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[0], ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[0]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_pdp=int(str(row[7]).rstrip()), n_confirm=int(str(row[6]).rstrip()), n_meninggal=int(str(row[9]).rstrip()), n_sembuh=int(str(row[8]).rstrip()), last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) i = i + 1 return output
def truncate_date(self, date_part, date_field): return fn.date_trunc(date_part, date_field, python_value=simple_date_time)
def get(self): ''' "analytics about how many likes was made" Example url: /api/analitics/?date_from=2020-02-02&date_to=2020-02-15 :return: analytics aggregated by day ''' data = parser_like_count.parse_args() app.logger.info(request) date___from, date___to = data['date_from'], data['date_to'] date__from = date___from + ' 00:00:00.000001' date__to = date___to + ' 00:00:00.000001' date_from = datetime.datetime.strptime((date__from), '%Y-%m-%d %H:%M:%S.%f') date_to = datetime.datetime.strptime((date__to), '%Y-%m-%d %H:%M:%S.%f') all_like = Like.select().where((Like.timestamp > date_from) & (Like.timestamp < date_to)).count() all_unlike = Unlike.select().where((Unlike.timestamp > date_from) & ( Unlike.timestamp < date_to)).count() days = [] day = (Like.select( Like.timestamp).where((Like.timestamp > date_from) & (Like.timestamp < date_to)).tuples()) for d in day: days.append(d[0].strftime("%m/%d/%Y")) days_list = list(reversed(list(unique_everseen(days)))) count_likes = [] query = (Like.select(fn.count(Like.id).alias('count')).group_by( fn.date_trunc( 'day', Like.timestamp)).where((Like.timestamp > date_from) & (Like.timestamp < date_to)).tuples()) for q in query: count_likes.append(q[0]) agr_like = dict(zip(days_list, count_likes)) unlike_days = [] unlike_day = (Unlike.select( Unlike.timestamp).where((Unlike.timestamp > date_from) & (Unlike.timestamp < date_to)).tuples()) for d in unlike_day: unlike_days.append(d[0].strftime("%m/%d/%Y")) unlike_days_list = list(reversed(list(unique_everseen(unlike_days)))) count_unlikes = [] query = (Unlike.select(fn.count(Unlike.id).alias('count')).group_by( fn.date_trunc('day', Unlike.timestamp)).where( (Unlike.timestamp > date_from) & (Unlike.timestamp < date_to)).tuples()) for q in query: count_unlikes.append(q[0]) agr_unlike = dict(zip(unlike_days_list, count_unlikes)) app.logger.info('count likes by day') return { 'all like from period': all_like, 'all unlike from period': all_unlike, 'count likes by day': agr_like, 'count unlikes by day': agr_unlike, 'code': 200 }
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where( Province.nama_prov == "Kalimantan Selatan") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Kalimantan Selatan", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://corona.kalselprov.go.id/cov_map" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text json_data = json.loads(data) # print(json_data) for data in json_data: list_item = {} list_item["provinsi"] = "Kalimantan Selatan" list_item["kode_kab_kota"] = data["code"] list_item["kab_kota"] = data["name"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = data["cov_odp_count"] list_item["n_pdp"] = data["cov_pdp_count"] list_item["n_confirm"] = data["cov_positive_count"] list_item["n_meninggal"] = data["cov_died_count"] list_item["n_sembuh"] = data["cov_recovered_count"] list_item["last_update"] = "N/A" kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == data["name"]) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=data["name"], kode=data["code"]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == datetime.datetime.now()) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=data["cov_odp_count"], n_pdp=data["cov_pdp_count"], n_confirm=data["cov_positive_count"], n_meninggal=data["cov_died_count"], n_sembuh=data["cov_recovered_count"], last_update=datetime.datetime.now(), ) output["result"].append(list_item) return output
def process_timefields(self, binsize, timefield, timezone): timefield_sql = R("t1.\"%s\" AT TIME ZONE '%s'" % (timefield, timezone)) self.selector = fn.date_trunc(binsize, timefield_sql) self.group_by_fields.append(self.selector) self.selected_fields.append(self.selector.alias(timefield))
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where( Province.nama_prov == "Daerah Istimewa Yogyakarta" ) if propinsi.count() < 1: propinsi = Province.create(nama_prov="Daerah Istimewa Yogyakarta", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select() .join(Province) .where(fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result # konfigurasi chromedriver chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1420,1080") chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") browser = webdriver.Chrome(chrome_options=chrome_options) hidden = "/html/body/div[2]/div[2]/div/div/form/input[1]" kodepos = '//*[@id="fname"]' button = "/html/body/div[2]/div[2]/div/div/form/button" directory = Path().absolute() kodepos_df = pd.read_csv(str(directory)+"/data/Data_KodePos_Kecamatan_DIY.csv", delimiter=";") output = {} output["result"] = [] for index, row in kodepos_df.iterrows(): # konfigurasi base URL link = "https://sebaran-covid19.jogjaprov.go.id/kodepos" browser.get(link) kode_pos = str(row["kode_pos"]) e = browser.find_element_by_xpath(hidden).get_attribute("value") e = browser.find_element_by_xpath(kodepos) e.send_keys(kode_pos) e = browser.find_element_by_xpath(button) e.click() # time.sleep(5) data = browser.page_source # print(data) url = soup(data, "lxml") odp = url.find("b", {"id": "odp"}) pdp = url.find("b", {"id": "pdp"}) positif = url.find("b", {"id": "positif"}) last_update_blok = url.find("div", {"class": "dataupdate"}) populasi = url.find("b", {"id": "populasi"}) if populasi is None: populasi = url.find("strong", {"id": "populasi"}) for item in last_update_blok.contents: if item.name == "p": if item.has_attr("style") == False: _last_update = item.text.replace("Data Update ", "").rstrip() list_item = {} list_item["provinsi"] = "Daerah Istimewa Yogyakarta" list_item["kode_kab_kota"] = str(row["kode_wilayah"]) list_item["kab_kota"] = str(row["kabupaten_kota"]) list_item["kecamatan"] = str(row["nama_kecamatan"]) list_item["populasi"] = str(populasi.text).rstrip() list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = int(str(odp.text).rstrip()) list_item["n_pdp"] = int(str(pdp.text).rstrip()) list_item["n_confirm"] = int(str(positif.text).rstrip()) list_item["n_meninggal"] = "N/A" list_item["n_sembuh"] = "N/A" list_item["last_update"] = _last_update kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == str(row["kabupaten_kota"]), ) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=str(row["kabupaten_kota"]), kode=str(row["kode_wilayah"]), ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=int(str(odp.text).rstrip()), n_pdp=int(str(pdp.text).rstrip()), n_confirm=int(str(positif.text).rstrip()), last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) browser.stop_client() browser.close() browser.quit() return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Aceh") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Aceh", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://covid.bravo.siat.web.id/json/peta" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text json_data = json.loads(data) # print(json_data) for data in json_data: list_item = {} list_item["provinsi"] = "Aceh" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = data["namaKabupaten"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = data["latitude"] list_item["long_kab_kota"] = data["longitude"] list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = data["odp"] list_item["n_pdp"] = data["pdp"] list_item["n_confirm"] = data["positif"] list_item["n_meninggal"] = data["positifMeninggal"] list_item["n_sembuh"] = data["positifSembuh"] list_item["last_update"] = data["updateDate"] kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == data["namaKabupaten"], ) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=data["namaKabupaten"], lat=data["latitude"], lon=data["longitude"], ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(data["updateDate"]), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=data["odp"], n_pdp=data["pdp"], n_confirm=data["positif"], n_meninggal=data["positifMeninggal"], n_sembuh=data["positifSembuh"], last_update=dateparser.parse(data["updateDate"]), ) output["result"].append(list_item) return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Sulawesi Barat") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Sulawesi Barat", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://covid19.sulbarprov.go.id/utama/data" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text url = soup(data, "lxml") table = url.find("table", attrs={"class": "table-responsive"}) # print(table) if table is not None: res = [] table_rows = table.find_all("tr") num_rows = len(table_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: if row: list_item = {} list_item["provinsi"] = "Sulawesi Barat" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = str(row[1]).rstrip() list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = int(str(row[6]).rstrip()) list_item["n_odp"] = int(str(row[2]).rstrip()) list_item["n_pdp"] = int(str(row[10]).rstrip()) list_item["n_confirm"] = int(str(row[14]).rstrip()) list_item["n_meninggal"] = ( int(str(row[5]).rstrip()) + int(str(row[9]).rstrip()) + int(str(row[12]).rstrip()) + int(str(row[18]).rstrip())) list_item["n_sembuh"] = int(str(row[17]).rstrip()) list_item["last_update"] = "N/A" # print(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1], ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[1]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == datetime.datetime.now(), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_otg=int(str(row[6]).rstrip()), n_odp=int(str(row[2]).rstrip()), n_pdp=int(str(row[10]).rstrip()), n_confirm=int(str(row[14]).rstrip()), n_meninggal=int(str(row[5]).rstrip()) + int(str(row[9]).rstrip()) + int(str(row[12]).rstrip()) + int(str(row[18]).rstrip()), n_sembuh=int(str(row[17]).rstrip()), last_update=datetime.datetime.now(), ) output["result"].append(list_item) i = i + 1 return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Jawa Tengah") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Jawa Tengah", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://corona.jatengprov.go.id/data" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) tree = html.fromstring(r.text) _last_update = tree.xpath( "//section[5]/div/div/div[1]/div/p/text()")[0].strip() table_elem = tree.xpath("//section[5]/div/div/div[2]/div/div/table")[0] table_str = etree.tostring(table_elem) if table_str is not None and table_str != "": res = [] df = pd.read_html(table_str)[0] df["positif"] = df["Positif: Sembuh"] + df[ "Positif: Meninggal"] + df["Positif: Dirawat"] table = df.to_dict("records") for row in table: list_item = {} list_item["provinsi"] = "Jawa Tengah" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = row["Kabupaten/Kota"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = row["ODP: Proses"] list_item["n_pdp"] = row["PDP: Dirawat"] list_item["n_confirm"] = row["positif"] list_item["n_meninggal"] = row["Positif: Meninggal"] list_item["n_sembuh"] = row["Positif: Sembuh"] list_item["last_update"] = _last_update kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == list_item["kab_kota"], ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=list_item["kab_kota"]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_pdp=list_item["n_pdp"], n_confirm=list_item["n_confirm"], n_meninggal=list_item["n_meninggal"], n_sembuh=list_item["n_sembuh"], last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where( Province.nama_prov == "Nusa Tenggara Barat") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Nusa Tenggara Barat", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "cache-control": "max-age=0", "cookie": "XSRF-TOKEN=eyJpdiI6IjJGSjNJWmxJS1AzNExHQ1poVDZPeWc9PSIsInZhbHVlIjoiTVEvWTVSWHZJSUtyY0RaalFPa2tzZW1hWmJYN0ZucGtEMXFtNGRRN3RLQXloVkxwNC90VEZMZHozYk1kV1cvLyIsIm1hYyI6ImVlM2NjOTg4YTA2YzMxZjllZGE3MGM0Njk1YTJmZGU1Nzc3ZGE4MmM1MWRlNTg4YWFjZWQ4MWQxZmUzMzkyNzEifQ%3D%3D; laravel_session=eyJpdiI6InN3a2JkdGJPcWMvNmVxbmxBZGxCK2c9PSIsInZhbHVlIjoiM1dwZmdmUHdNY3RwWG9oVXJqM2dYQmZSWnlEakY3TkVNZ2Mra21RY3hLN3V0UGMwQWxVbzhSbU5NNjR0aHdyeiIsIm1hYyI6ImQxNzYyMWI2MjhkMDRlYTY1Mjc4NDFhMTRkMzZiNDliNjdkY2NiNDkxZTY1NTRjZTIxZGVjZGE1YjkzZmUyZWYifQ%3D%3D", "referer": "https://corona.ntbprov.go.id/", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "******", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36", } link = "https://corona.ntbprov.go.id/list-data" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=False, headers=headers) data = r.text url = soup(data, "lxml") table = url.find("table", attrs={"class": "table table-bordered table-striped"}) # print(table) if table is not None: res = [] th = table.find("th") info_date = th.text.replace("\n", "").replace(" ", "") pos_l = info_date.find(",") pos_r = info_date.rfind("Pukul") _last_update = info_date[pos_l + 1:pos_r] table_rows = table.find_all("tr") num_rows = len(table_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: if row: list_item = {} list_item["provinsi"] = "Nusa Tenggara Barat" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = (str(row[0]).replace( "\n", "").replace(" ", " ")) list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = int(str(row[5]).rstrip()) list_item["n_odp"] = int(str(row[8]).rstrip()) list_item["n_pdp"] = int(str(row[11]).rstrip()) list_item["n_confirm"] = int(str(row[14]).rstrip()) list_item["n_meninggal"] = int(str(row[16]).rstrip()) list_item["n_sembuh"] = int(str(row[17]).rstrip()) list_item["last_update"] = _last_update # print(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1], ) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=str(row[0]).replace("\n", "").replace( " ", " "), ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_otg=int(str(row[5]).rstrip()), n_odp=int(str(row[8]).rstrip()), n_pdp=int(str(row[11]).rstrip()), n_confirm=int(str(row[14]).rstrip()), n_meninggal=int(str(row[16]).rstrip()), n_sembuh=int(str(row[17]).rstrip()), last_update=dateparser.parse(_last_update), ) output["result"].append(list_item) i = i + 1 return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Sulawesi Selatan") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Sulawesi Selatan", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select() .join(Province) .where(fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://covid19.sulselprov.go.id" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=False) data = r.text # print(data) data = re.sub(r"<!--", "", data) data = re.sub(r"-->", "", data) url = soup(data, "lxml") title = url.find("h4", attrs={"class": "text-danger"}).text pos = str(title).rfind("-") _last_update = str(title)[pos + 1 :] table = url.find("table", attrs={"class": "table table-striped"}) if table is not None: res = [] table_rows = table.find_all("tr") num_rows = len(table_rows) # print(num_rows) i = 0 for tr in table_rows: td = tr.find_all("td") row = [tr.text.strip() for tr in td if tr.text.strip()] # print(row) if i >= 1 and i < num_rows - 1: list_item = {} list_item["provinsi"] = "Sulawesi Selatan" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = row[1] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = "N/A" list_item["long_kab_kota"] = "N/A" list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = int(str(row[2]).rstrip()) list_item["n_pdp"] = int(str(row[3]).rstrip()) list_item["n_confirm"] = int(str(row[4]).rstrip()) list_item["n_meninggal"] = "N/A" list_item["n_sembuh"] = "N/A" list_item["last_update"] = _last_update # print(list_item) output["result"].append(list_item) kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1] ) if kabkota.count() < 1: kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[1]) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == dateparser.parse(_last_update), ) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=int(str(row[2]).rstrip()), n_pdp=int(str(row[3]).rstrip()), n_confirm=int(str(row[4]).rstrip()), last_update=dateparser.parse(_last_update), ) i = i + 1 return output
def scrape(): prov = __name__.split(".")[-1] propinsi = Province.select().where(Province.nama_prov == "Banten") if propinsi.count() < 1: propinsi = Province.create(nama_prov="Banten", alias=prov) else: propinsi = propinsi.get() sekarang = datetime.datetime.now().date() try: result = list( Data.select().join(Province).where( fn.date_trunc("day", Data.last_update) == sekarang), Province.alias == prov, ) except: result = [] if len(result) > 0: return result link = "https://infocorona.bantenprov.go.id/" output = {} output["result"] = [] with requests.session() as s: r = s.get(link, verify=True) data = r.text url = soup(data, "lxml") script = url.find_all("script") json_data = "" for item in script: if re.search(r"pieSeries.data\s\=\s(.*)\;", str(item)): var_data = re.findall(r"pieSeries.data\s\=\s(.*)\;", str(item)) json_data = json.loads(str(var_data[0])) for data in json_data: list_item = {} list_item["provinsi"] = "Banten" list_item["kode_kab_kota"] = "N/A" list_item["kab_kota"] = data["title"] list_item["kecamatan"] = "N/A" list_item["populasi"] = "N/A" list_item["lat_kab_kota"] = data["latitude"] list_item["long_kab_kota"] = data["longitude"] list_item["n_odr"] = "N/A" list_item["n_otg"] = "N/A" list_item["n_odp"] = data["pieData"][0]["value"] list_item["n_pdp"] = data["pieData"][1]["value"] list_item["n_confirm"] = data["pieData"][2]["value"] list_item["n_meninggal"] = "N/A" list_item["n_sembuh"] = "N/A" list_item["last_update"] = "N/A" kabkota = KabupatenKota.select().where( KabupatenKota.prov_id == propinsi, KabupatenKota.nama == data["title"]) if kabkota.count() < 1: kabkota = KabupatenKota.create( prov_id=propinsi, nama=data["title"], lat=data["latitude"], lon=data["longitude"], populasi="", ) else: kabkota = kabkota.get() datum = Data.select().where( Data.kabupaten == kabkota, Data.last_update == datetime.datetime.now()) if datum.count() < 1: datum = Data.create( kabupaten=kabkota, n_odp=data["pieData"][0]["value"], n_pdp=data["pieData"][1]["value"], n_confirm=data["pieData"][2]["value"], last_update=datetime.datetime.now(), ) output["result"].append(list_item) return output