Esempio n. 1
0
def sync_stats():
    """Fetch latest stats from IEDCR website"""
    try:
        logger = Logger.create_logger("tasks")
        logger.info("Starting sync of stats data")
        if Meta.is_stats_syncing():
            logger.info("A stats sync is already in progress")
            return

        Meta.set_stats_syncing(True)

        provider = DataProvider()
        data = provider.get_stats()

        stat = Stat.get()

        # iteratively update the data
        for attr, value in data.items():
            setattr(stat, attr, value)

        stat.save()
        Meta.set_stats_syncing(False)
        logger.info("Stats sync complete")
    except Exception as e:
        Meta.set_stats_syncing(False)
        logger.error(f"Stats sync failed with error: {e}")
Esempio n. 2
0
class BaseModel(db.Model):
    __abstract__ = True
    logger = Logger.create_logger(__name__)

    def save(self):
        """save the item to database"""
        try:
            db.session.add(self)
            db.session.commit()
        except Exception as e:
            self.logger.error(f"Error while saving to database: {e}")

    def delete(self):
        """delete the item from database"""
        try:
            db.session.delete(self)
            db.session.commit()
        except Exception as e:
            self.logger.error(f"Error while deleting from database: {e}")
Esempio n. 3
0
def sync_district_data():
    """Fetch latest data from IEDCR reports"""
    try:
        # For some unknown reason, Logger.createLogger(__name__),
        # where __name__ == "application.tasks" doesn't bind
        # the handler. After some debugging, I found that anything
        # prefixing "application.*" doesn't work. According to
        # Logger.create_logger(), it assumes that a handler is
        # already binded, although it's not.

        # For the other parts it doesn't cause any problem. For example,
        # when the logger is created inside DataProvider module, the name
        # "application.provider.*" doesn't cause any problem.

        # This is a weird issue. I will look into this later. For now,
        # I will name it "tasks"
        logger = Logger.create_logger("tasks")
        logger.info("Starting sync of district data")
        if Meta.is_district_syncing():
            logger.info("A district sync is already in progress")
            return

        # set updating state to true
        Meta.set_district_syncing(True)

        # download and get updated data
        provider = DataProvider()
        new_data = (provider.sync_district_data()
                    )  # returns list of tuple as [...(districtName, Count)]
        last_updated = Meta.get_last_district_sync()

        # flag to monitor if fetched data has changed
        has_updated = False

        # differnece with current time and last updated time
        update_delta = datetime.utcnow() - last_updated

        # check the data against database records and update as necessary
        for pair in new_data:
            # ignore blank data
            if pair[0] == "" or pair[1] == "":
                continue

            district = District.find_by_name(pair[0])
            if district:

                if district.count != pair[1]:
                    # count changed from last record
                    # - save previous count
                    # - update new count
                    district.prev_count = district.count
                    district.count = pair[1]
                    has_updated = True
                else:
                    # count did not change
                    # - make count and prev_count same only if last change was 1 day ago
                    if update_delta.days >= 1:
                        district.prev_count = district.count

                district.save()
            else:
                new_district = District(pair[0], pair[1])
                new_district.save()
                has_updated = True

        # set updating state to False as update is finished
        Meta.set_district_syncing(False)

        logger.debug(f"Has updated = {has_updated}")
        if has_updated:
            # set last updated time to now if more than 24hrs
            # the 24hrs constant window helps to better calculate new_count - prev_count
            if update_delta.days >= 1:
                Meta.set_last_district_sync()
                logger.info("Updated last sync time")
            logger.info("District sync complete (fetched new data)")
            return
        logger.info("District sync complete (already up-to-date)")
    except Exception as e:
        Meta.set_district_syncing(False)
        logger.error(f"District sync failed with error: {e}")
Esempio n. 4
0
class DataProvider:
    logger = Logger.create_logger(__name__)

    def __init__(self):
        self.stats_data_source = "https://corona.gov.bd/lang/en"
        self.district_report_url = os.environ.get("REPORT_URL")
        self.trans_table = str.maketrans("০১২৩৪৫৬৭৮৯", "0123456789")

    def get_stats(self):
        """Fetch the latest statistics like total positive cases, deaths etc"""
        page = requests.get(self.stats_data_source)
        soup = bs(page.content, "html.parser")
        counts = soup.select(".live-update-box-wrap-h1>b")

        # process counts - replace bangla digits with english
        for i in range(len(counts)):
            # counts[i] = counts[i].text.translate(self.trans_table)
            counts[i] = int(counts[i].text)

        data_dict = {
            "positive_24": counts[0],
            "positive_total": counts[1],
            "death_24": counts[2],
            "death_total": counts[3],
            "recovered_24": counts[4],
            "recovered_total": counts[5],
            "test_24": counts[6],
            "test_total": counts[7],
        }

        self.logger.debug(data_dict)
        return data_dict

    def parse_district_data(self):
        """Parse the Google Sheets to get district data"""
        page = requests.get(self.district_report_url)
        soup = bs(page.content, "html.parser")
        table = soup.find("table")
        rows = table.find_all("tr")
        result = []

        for rindex, row in enumerate(rows):
            # ignore first two rows and last row because they are headers/totals
            if rindex < 2 or rindex == len(rows) - 1:
                continue

            data = []
            for col in row.find_all("td"):
                # ignore division names column
                if col.has_attr("rowspan"):
                    continue
                data.append(self.sanitize(col.text))

            result.append(data)

        return result

    def sanitize(self, s):
        """sanitize string:
        - by replacing invalid chars with correct ones
        - converting to int if applicable"""
        mapping = {"’": "'"}
        for key, val in mapping.items():
            s = s.replace(key, val)

        if s.isdigit():
            s = int(s)
        return s

    def sync_district_data(self):
        return self.parse_district_data()