Ejemplo n.º 1
0
    def _get_regions_data(self):
        # # {"data":[[["Aglonas novads",0,"0","56.0965 27.114","Aglonas novads"],
        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/regions_data.json'
            print(path)
            with open(path, 'r', encoding='utf-8') as f:
                data = f.read()
                if '<!DOCTYPE HTML>' in data:
                    continue  # WARNING!!! - TODO: Add agegroup data, etc from the new page!!! ===================================================
                data = json.loads(data)

            for i_data in data['data']:
                for region_name, value, *leftover in i_data:
                    print(region_name)

                    # Only confirmed and deaths are shown in the dashboard
                    date = datetime.datetime.fromtimestamp(
                        data['refreshed'] / 1000.0).strftime('%Y_%m_%d')

                    if value is not None:
                        r.append(region_schema=Schemas.ADMIN_1,
                                 region_parent='LV',
                                 region_child=region_name,
                                 datatype=DataTypes.TOTAL,
                                 value=int(value),
                                 date_updated=date,
                                 source_url=self.SOURCE_URL)

            out.extend(r)
        return out
def get_vic_carto_datapoints():
    # [{"postcode": 3006, "_new": 0, "activedisp": "Five or fewer active cases",
    # "cases": 65, "ratedisp": 13, "population": 18811},
    #
    # {"rows":[{"cartodb_id":287,
    # "the_geom":"0101000020E6100000386744696F226240E10B93A982E942C0",
    # "the_geom_webmercator":"0101000020110F00008D3881B2A4CD6E41295C51BCE25F51C1",
    # "postcode":3126,"affected":0,"band":"None","lat":-37.8243,"lon":145.0761,
    # "suburbs":"Camberwell East, Canterbury","active":0,"rate":0,"total":2},

    date = (datetime.now() -
            timedelta(hours=20, minutes=30)).strftime('%Y_%m_%d')
    dir_ = get_data_dir() / 'vic' / 'newmap_postcode' / date
    if not exists(dir_):
        makedirs(dir_)

    postcode_json_path = dir_ / 'postcode.json'
    if not exists(postcode_json_path):
        urlretrieve(
            "https://dhhs.carto.com:443/api/v2/sql?q=select%20*%20from%20public.covid19_postcodes",
            postcode_json_path)

    r = DataPointMerger()
    dates = sorted(listdir(get_data_dir() / 'vic' / 'newmap_postcode'))
    if not date in dates:
        dates.append(date)

    for i_date in dates:
        path = get_data_dir(
        ) / 'vic' / 'newmap_postcode' / i_date / 'postcode.json'
        r.extend(_get_datapoints(i_date, path))
    return r
Ejemplo n.º 3
0
    def __get_tests_datapoints(self, SOURCE_URL, path_tests):
        r = DataPointMerger()

        with open(path_tests, 'r', encoding='utf-8') as f:
            for item in json.loads(f.read())['data']:
                try:
                    item['POA_NAME16'] = str(int(float(item['POA_NAME16'])))
                except ValueError:
                    pass

                date = self.__get_partial_date(path_tests, item['Date'])
                number = int(item['Number'])
                # recent = item['Recent'] # TODO: ADD ME!!! ========================================================
                postcode = item['POA_NAME16'] if item['POA_NAME16'] else 'Unknown'

                r.append(DataPoint(
                    region_schema=Schemas.POSTCODE,
                    region_parent='AU-NSW',
                    region_child=postcode,
                    datatype=DataTypes.TESTS_TOTAL,
                    value=number,
                    date_updated=date,
                    source_url=SOURCE_URL,
                    source_id=self.SOURCE_ID
                ))

        return r
Ejemplo n.º 4
0
    def get_datapoints(self):
        r = DataPointMerger()
        totals = Counter()
        added_totals = defaultdict(set)
        tests = Counter()
        added_tests = defaultdict(set)

        for date in sorted(listdir(get_overseas_dir() / 'fr' / 'esridata')):
            r.extend(self._get_positive_by_department(date, totals, added_totals, tests, added_tests))
        return r
Ejemplo n.º 5
0
    def get_datapoints(self):
        ua = URLArchiver(f'sa/dashboard')

        i_r = DataPointMerger()
        for period in ua.iter_periods():
            for subperiod_id, subdir in ua.iter_paths_for_period(period):
                path = ua.get_path(subdir)
                with open(path, 'r', encoding='utf-8') as f:
                    data = json.loads(f.read())
                i_r.extend(self._get_from_json(data))
        return i_r
Ejemplo n.º 6
0
    def get_datapoints(self):
        # {
        #   "regions-history": [
        #     {
        #       "date": "2020-04-20",
        #       "regions": [
        #         {
        #           "cases_per_100000_people": 11.88,
        #           "population": 606170,
        #           "region_cases": 72,
        #           "region_en_name": "East Macedonia and Thrace",
        #           "region_gr_name": "Ανατολική Μακεδονία και Θράκη"
        #         },

        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        region_map = {
            'west greece': 'GR-G',
            'central greece': 'GR-H',
            'north aegean': 'GR-K',
            'west macedonia': 'GR-C',
            'without permanent residency in greece': 'Other',
            'under investigation': 'Unknown',
        }

        for date in self.iter_nonempty_dirs(base_dir):
            if date >= '2020_10_10':
                continue  # NOTE ME: This stopped getting updated!!!

            r = self.sdpf()
            path = f'{base_dir}/{date}/regions.json'
            with open(path, 'r', encoding='utf-8') as f:
                data = json.loads(f.read())

            for day_dict in data['regions-history']:
                date = self.convert_date(day_dict['date'])

                for region_dict in day_dict['regions']:
                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='GR',
                             region_child=region_map.get(
                                 region_dict['region_en_name'].lower(),
                                 region_dict['region_en_name']),
                             datatype=DataTypes.TOTAL,
                             value=int(region_dict['region_cases']),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

            out.extend(r)
        return out
 def get_datapoints(self):
     r = []
     dpm = DataPointMerger()
     base_dir = self.get_path_in_dir('')
     for date in self.iter_nonempty_dirs(base_dir):
         r.extend(self._get_data_from_cases_csv(date, dpm))
     return r
Ejemplo n.º 8
0
    def get_datapoints(self):
        date = (
            datetime.now() - timedelta(hours=20, minutes=30)
        ).strftime('%Y_%m_%d')

        dates = sorted(listdir(get_data_dir() / 'nsw' / 'open_data'))
        if not date in dates:
            dates.append(date)

        website_data = DataPointMerger()
        for i_date in dates:
            download = i_date == date
            for datapoint in self.__get_website_datapoints(i_date, download=download):
                website_data.append(datapoint)

        r = []
        r.extend(website_data)
        return r
    def get_datapoints(self):
        r = []
        dpm = DataPointMerger()
        # [{"year":2020,"month":3,"day":4,"regions":{"mb":{"benedikt":{"activeCases":null,"confirmedToDate":null,"deceasedToDate":0},
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            print(date)
            r.extend(self._get_municipalities_data(date, dpm))
        return r
    def get_datapoints(self):
        date = datetime.now() - timedelta(hours=20, minutes=30)
        date = date.strftime('%Y_%m_%d')

        dates = sorted(listdir(get_data_dir() / 'nsw' / 'open_data'))
        if not date in dates:
            dates.append(date)

        open_data = DataPointMerger()  # source_id=self.SOURCE_ID

        for i_date in dates:  # open_data.iter_unprocessed_dates(dates)
            download = i_date == date
            for datapoint in self.__get_open_datapoints(i_date,
                                                        download=download):
                open_data.append(datapoint)

        #open_data.save_state()

        r = []
        r.extend(open_data)
        return r
    def get_datapoints(self):
        SA_DASH_JSON_URL = 'https://www.covid-19.sa.gov.au/__data/assets/' \
                           'file/0004/145849/covid_19_daily.json'
        ua = URLArchiver(f'sa/dashboard')
        ua.get_url_data(SA_DASH_JSON_URL, cache=False)

        r = []
        dpm = DataPointMerger()
        for sub_dir in sorted(listdir(SA_MAP_DIR)):
            joined_dir = f'{SA_MAP_DIR}/{sub_dir}'
            r.extend(self._get_data(joined_dir, dpm))
        return r
Ejemplo n.º 12
0
    def _get_county_data(self):
        # ORIGID	CountyName	PopulationCensus16	IGEasting	IGNorthing	Lat	Long	UniqueGeographicIdentifier	ConfirmedCovidCases	PopulationProportionCovidCases	ConfirmedCovidDeaths	ConfirmedCovidRecovered	x	y	FID	TimeStampDate
        # 	Carlow	56932	278661	163444	52.7168	-6.8367	http://data.geohive.ie/resource/county/2ae19629-143d-13a3-e055-000000000001	175	307.384247874657			-6.8367	52.7168	194903	2020/07/01 00:00:00+00
        # 	Cavan	76176	246380	304501	53.9878	-7.2937	http://data.geohive.ie/resource/county/2ae19629-1448-13a3-e055-000000000001	862	1131.5900021004			-7.2937	53.9878	194904	2020/07/01 00:00:00+00
        # 	Clare	118817	133493	182732	52.8917	-8.9889	http://data.geohive.ie/resource/county/2ae19629-1450-13a3-e055-000000000001	368	309.719989563783			-8.9889	52.8917	194905	2020/07/01 00:00:00+00

        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/county_data.csv'

            with open(path, 'r', encoding='utf-8') as f:
                for item in csv.DictReader(f):
                    date = self.convert_date(item['TimeStampDate'].split()[0])

                    for datatype, value in (
                        (DataTypes.TOTAL, int(item['ConfirmedCovidCases'])),
                        (DataTypes.STATUS_DEATHS,
                         int(item['ConfirmedCovidDeaths']
                             or 0)), (DataTypes.STATUS_RECOVERED,
                                      int(item['ConfirmedCovidRecovered']
                                          or 0)),
                        (DataTypes.STATUS_ACTIVE,
                         int(item['ConfirmedCovidCases'] or 0) -
                         int(item['ConfirmedCovidDeaths'] or 0) -
                         int(item['ConfirmedCovidRecovered'] or 0))):
                        r.append(region_schema=Schemas.ADMIN_1,
                                 region_parent='IE',
                                 region_child=item['CountyName'],
                                 datatype=datatype,
                                 value=value,
                                 date_updated=date,
                                 source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 13
0
    def _get_recovered_sum(self):
        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/tw_corona.html'
            print(path)
            try:
                with open(path, 'r', encoding='utf-8') as f:
                    html = f.read()
            except UnicodeDecodeError:
                import brotli
                with open(path, 'rb') as f:
                    html = brotli.decompress(f.read()).decode('utf-8')

            new_data_template = '.geojson","series":[{'
            if new_data_template in html:
                data = '[{%s}]' % html.split(new_data_template)[-1].split(
                    '}],')[0]
            else:
                data = html.split('var jdata1 = ')[-1].split(
                    '\n')[0].strip().strip(';').replace("'", '"')

            for item in json.loads(data):
                # [{'code':'Taipei City', 'value':118}, ...]
                region = place_map[item['code'].strip().lower()]

                r.append(region_schema=Schemas.ADMIN_1,
                         region_parent='TW',
                         region_child=region,
                         datatype=DataTypes.TOTAL,
                         value=int(item['value']),
                         date_updated=date,
                         source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 14
0
    def _get_regions_data(self):
        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/regions_data.json'
            with open(path, 'r') as f:
                data = json.loads(f.read())

            for feature in data['features']:
                attributes = feature['attributes']

                r.append(region_schema=Schemas.ADMIN_1,
                         region_parent='LV',
                         region_child=attributes['Nos_pilns'],
                         datatype=DataTypes.TOTAL,
                         value=int(attributes['Covid_sasl']),
                         date_updated=date,
                         source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 15
0
    def __postcode_datapoints_to_lga(self, SOURCE_URL, postcode_to_lga, r, source_id):
        # Convert postcode to LGA where possible
        new_r = DataPointMerger()
        added_to_lga = set()
        processed_postcode = set()
        mapping = Counter()

        for datapoint in sorted(r, key=lambda i: i.date_updated):
            if datapoint.region_schema == Schemas.LGA:
                added_to_lga.add((
                    datapoint.region_child,
                    datapoint.datatype
                ))
                continue
            elif datapoint.region_schema != Schemas.POSTCODE:
                continue
            elif datapoint.region_child in postcode_to_lga:
                lga = postcode_to_lga[datapoint.region_child]
            else:
                lga = 'unknown'
                if datapoint.region_child != 'unknown':
                    print("NOT FOUND:", datapoint.region_child)
                # continue  # WARNINIG!!! ================================================================================

            if (datapoint.region_child, datapoint.datatype, datapoint.date_updated) in processed_postcode:
                #print("IGNORING DOUBLE-UP:", datapoint)
                continue
            processed_postcode.add((datapoint.region_child, datapoint.datatype, datapoint.date_updated))

            #if lga == 'cumberland':
            #    print('USING:', datapoint)

            mapping[
                lga,
                datapoint.datatype,
                datapoint.date_updated
            ] += datapoint.value

        new_r.extend(r)

        for (lga, datatype, date_updated), value in mapping.items():
            if (lga, datatype) in added_to_lga:
                # Don't add to LGA if available using direct data!
                continue

            new_r.append(DataPoint(
                region_schema=Schemas.LGA,
                region_parent='AU-NSW',
                region_child=lga,
                datatype=datatype,
                value=value,
                date_updated=date_updated,
                source_url=SOURCE_URL,
                source_id=source_id
            ))

        return new_r
Ejemplo n.º 16
0
    def _get_recovered_sum(self):
        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/is_index.html'
            with open(path, 'rb') as f:
                data = f.read()
                data = data.decode('utf-8')

            # TODO: There are quite a few more stats!!

            regional_stats = data.split(
                '[[[null,{"font-weight":"700","value":"Infections"},'
                '{"font-weight":"700","value":"Quarantine"}],')[1].split(
                    ']]],')[0]
            #print(regional_stats)
            regional_stats = json.loads(f'[{regional_stats}]]')

            for region, infections_dict, quarantine_dict in regional_stats:
                region = place_map[region]

                r.append(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='IS',
                    region_child=region,
                    datatype=DataTypes.TOTAL,
                    # This changed to be an int from a dict on 9 Jun
                    value=int(infections_dict['value']) if isinstance(
                        infections_dict, dict) else int(infections_dict),
                    date_updated=date,
                    source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 17
0
    def get_datapoints(self):
        r = DataPointMerger()
        with set_locale('tr_TR.utf8'):
            for dir_ in sorted(listdir(self.output_dir)):
                with open(self.output_dir / dir_ / 'covid19_in_turkey.html', 'r', encoding='utf-8') as f:
                    html = f.read()

                r.extend(self._get_total_datapoints(html))
                r.extend(self._get_recovered_death_datapoints(html))
        return r
Ejemplo n.º 18
0
    def get_datapoints(self):
        r = DataPointMerger()

        # Start Hyper
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
                          ) as hyper:

            for date in sorted(listdir(self.output_dir)):
                self.__unzip_date(date)

                # active_cases/Data/dash-transmission/vic-details-transmissions-pub-extract.hyper
                # cases/Data/dash-charts/vic_detailed_prep Extract_daily-pubextract.hyper
                r.extend(self.__get_genderagegroup_datapoints(hyper, date))
                r.extend(self.__get_transmissions_datapoints(hyper, date))
                r.extend(
                    self.__get_transmissions_over_time_datapoints(hyper, date))

        return r
Ejemplo n.º 19
0
    def get_datapoints(self):
        r = DataPointMerger()
        for date in r.iter_unprocessed_dates(
                sorted(listdir(get_data_dir() / 'vic' / 'csv_data'))):
            r.extend(self._get_postcode_datapoints(date))
            r.extend(self._get_lga_datapoints(date))

            #print(get_data_dir(), date)

            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'agegroup.csv').exists():
                r.extend(self._get_agegroup_datapoints(date))
            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'all_lga.csv').exists():
                r.extend(self._get_all_lga_datapoints(date))
            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'all_lga_acquired_source').exists():
                r.extend(self._get_all_lga_acquired_source_datapoints(date))
            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'all_acquired_source').exists():
                r.extend(self._get_all_acquired_source_datapoints(date))
        return r
Ejemplo n.º 20
0
 def get_datapoints(self):
     r = []
     dpm = DataPointMerger()
     for date in self.iter_nonempty_dirs(self.output_dir):
         r.extend(self._get_recovered_sum(date, dpm))
     return r
Ejemplo n.º 21
0
 def get_datapoints(self):
     r = DataPointMerger()
     for path in SA_TABLEAU_MAP_DIR.iterdir():
         r.extend(self._get_datapoints(path))
     return r
Ejemplo n.º 22
0
 def get_datapoints(self):
     r = []
     dpm = DataPointMerger()
     for date in sorted(listdir(get_overseas_dir() / 'uk' / 'gov-api')):
         r.extend(self._get_datapoints(date, dpm))
     return r
Ejemplo n.º 23
0
    def _get_positive_by_department(self, date,
                                    totals, added_totals,
                                    tests, added_tests):
        out = DataPointMerger()
        base_path = get_overseas_dir() / 'fr' / 'esridata' / date

        for fnam in sorted(listdir(base_path)):
            r = self.sdpf()
            with open(base_path / fnam, 'r', encoding='utf-8') as f:
                data = json.loads(f.read())
                print(base_path, fnam, data)

            for property in data['features']:
                attributes = property['attributes']
                print(property)

                date = self.convert_date(attributes['Jour'])
                try:
                    region_child = 'FR-%02d' % int(attributes['CODE_DEPT'])
                except ValueError:
                    # e.g. 2A
                    region_child = 'FR-%s' % attributes['CODE_DEPT']

                for datatype, value in (
                    (DataTypes.STATUS_HOSPITALIZED, attributes['Hospitalisation_T']),
                    #(DataTypes.FIXME, attributes['Hospitalisation_H']),
                    #(DataTypes.FIXME, attributes['Hospitalisation_F']),
                    (DataTypes.STATUS_ICU, attributes['Reanimation_T']),
                    #(DataTypes.FIXME, attributes['Reanimation_H']),
                    #(DataTypes.FIXME, attributes['Reanimation_F']),
                    (DataTypes.STATUS_DEATHS, attributes['Deces_T']),
                    #(DataTypes.FIXME, attributes['Deces_H']),
                    #(DataTypes.FIXME, attributes['Deces_F']),
                    (DataTypes.NEW, attributes['Tests_Viro_P']),
                    #(DataTypes.TESTS_TOTAL, attributes['Tests_Viro_T'])  # FIXME: This is new tests!
                ):
                    if value is None:
                        continue

                    r.append(
                        region_schema=Schemas.ADMIN_1,
                        region_parent='FR',
                        region_child=region_child,
                        datatype=datatype,
                        value=value,
                        date_updated=date,
                        source_url=self.SOURCE_URL
                    )

                # I don't think Nbre_Cas_Confirmes is ever not None
                assert attributes['Nbre_Cas_Confirmes'] is None, attributes

                if attributes['Tests_Viro_P'] and not date in added_totals[region_child]:
                    added_totals[region_child].add(date)
                    totals[region_child] += attributes['Tests_Viro_P']# or attributes['Nbre_Cas_Confirmes'])

                    r.append(
                        region_schema=Schemas.ADMIN_1,
                        region_parent='FR',
                        region_child=region_child,
                        datatype=DataTypes.TOTAL,
                        value=totals[region_child],
                        date_updated=date,
                        source_url=self.SOURCE_URL
                    )

                if attributes['Tests_Viro_T'] and not date in added_tests[region_child]:
                    added_tests[region_child].add(date)
                    tests[region_child] += attributes['Tests_Viro_T']

                    r.append(
                        region_schema=Schemas.ADMIN_1,
                        region_parent='FR',
                        region_child=region_child,
                        datatype=DataTypes.TESTS_TOTAL,
                        value=tests[region_child],
                        date_updated=date,
                        source_url=self.SOURCE_URL
                    )

            out.extend(r)
        return out
Ejemplo n.º 24
0
    def _get_regions_data(self):
        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/regions_data.json'
            print(path)

            with open(path, 'r', encoding='utf-8') as f:
                data = f.read()
                if 'Error performing query operation' in data:
                    continue

                data = data.replace(
                    'dojo_request_script_callbacks.dojo_request_script57(',
                    '').rstrip().rstrip(');')
                data = json.loads(data)

            for feature in data['features']:
                attributes = feature['attributes']
                #print(attributes)

                # Only confirmed and deaths are shown in the dashboard
                date = datetime.datetime.fromtimestamp(
                    attributes['ATNUJINTA'] / 1000).strftime('%Y_%m_%d')
                region_parent = 'LT'  #county_map[attributes['APSKRITIS']]  #  NOTE ME: Not sure it's worth splitting for now, but can map to admin_1
                region_child = attributes['SAV_PAV'].replace(' r.',
                                                             '').replace(
                                                                 ' m.', '')
                confirmed = attributes['ATVEJAI']
                deaths = attributes['MIRTYS_KITA']
                positive = attributes['VYRAI']
                recovered = attributes['PASVEIKO']
                women = attributes['MOTERYS']
                men = attributes['VYRAI']
                treated = attributes['GYDOMA']
                unknown = attributes['MIRE']

                if confirmed is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.TOTAL,
                             value=int(confirmed),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if positive is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.CONFIRMED,
                             value=int(positive),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if women is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.TOTAL_FEMALE,
                             value=int(women),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if men is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.TOTAL_MALE,
                             value=int(men),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if deaths is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.STATUS_DEATHS,
                             value=int(deaths),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if recovered is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.STATUS_RECOVERED,
                             value=int(recovered),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if recovered is not None and confirmed is not None and deaths is not None:
                    r.append(region_schema=Schemas.LT_MUNICIPALITY,
                             region_parent=region_parent,
                             region_child=region_child,
                             datatype=DataTypes.STATUS_RECOVERED,
                             value=int(confirmed) - int(recovered) -
                             int(deaths),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 25
0
    def __get_totals_datapoints(self, SOURCE_URL, path_totals):
        # Pretty sure this is dupe data (for now)
        # Don't uncomment this without making sure this won't double the result(!)
        r = DataPointMerger()

        with open(path_totals, 'r', encoding='utf-8') as f:
            for item in json.loads(f.read())['data']:
                if 'active_cases' in item:
                    try:
                        item['postcode'] = str(int(float(item['postcode'])))
                    except ValueError:
                        pass

                    # New format (2022)
                    date = item['date'].replace('-', '_')
                    total = int(item['total_cases'])
                    active = int(item['active_cases'])
                    postcode = item['postcode'] if item['postcode'] else 'Unknown'

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.TOTAL,
                        value=total,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))
                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_ACTIVE,
                        value=active,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))
                else:
                    try:
                        item['POA_NAME16'] = str(int(float(item['POA_NAME16'])))
                    except ValueError:
                        pass

                    date = self.__get_partial_date(path_totals, item['Date'])
                    number = int(item['Number'])
                    postcode = item['POA_NAME16'] if item['POA_NAME16'] else 'Unknown'

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.TOTAL,
                        value=number,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))

        return r
Ejemplo n.º 26
0
    def __get_active_deaths_datapoints(self, SOURCE_URL, path_active_deaths, active_data):
        r = DataPointMerger()

        with open(path_active_deaths, 'r', encoding='utf-8') as f:
            for item in json.loads(f.read())['data']:
                try:
                    item['POA_NAME16'] = str(int(float(item['POA_NAME16'])))
                except ValueError:
                    pass

                date = self.__get_partial_date(path_active_deaths, item['Date'])
                print(item)
                recovered = int(item.get('Recovered', 0))
                deaths = int(item['Deaths'])
                cases = int(item['Cases'])
                censored = int(item.get('censored',
                                        0))  # NOTE ME:  From 12 June, this heatmap reporting of active cases has changed. Cases that are not recorded as recovered or deceased after six weeks are not included.
                postcode = item['POA_NAME16'] if item['POA_NAME16'] else 'Unknown'

                r.append(DataPoint(
                    region_schema=Schemas.POSTCODE,
                    region_parent='AU-NSW',
                    region_child=postcode,
                    datatype=DataTypes.TOTAL,
                    value=cases,
                    date_updated=date,
                    source_url=SOURCE_URL,
                    source_id=self.SOURCE_ID
                ))

                if postcode in active_data:
                    num_active = active_data[postcode].pop()

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_ACTIVE,
                        value=num_active, # CHECK ME!!!!! =====================================
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_RECOVERED,
                        value=cases-num_active-deaths,  # CHECK ME!!!!! =====================================
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))
                elif date <= '2020_06_12':
                    active = cases - recovered - deaths - censored

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_ACTIVE,
                        value=active,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))

                #r.append(DataPoint(
                #    region_schema=Schemas.POSTCODE,
                #    region_parent='AU-NSW',
                #    region_child=postcode,
                #    datatype=DataTypes.STATUS_RECOVERED,
                #    value=recovered,
                #    date_updated=date,
                #    source_url=SOURCE_URL
                #))
                r.append(DataPoint(
                    region_schema=Schemas.POSTCODE,
                    region_parent='AU-NSW',
                    region_child=postcode,
                    datatype=DataTypes.STATUS_DEATHS,
                    value=deaths,
                    date_updated=date,
                    source_url=SOURCE_URL,
                    source_id=self.SOURCE_ID
                ))

        return r
Ejemplo n.º 27
0
    def _get_by_datos_isciii(self):
        out = DataPointMerger()

        # Fecha,cod_ine,CCAA,Casos,PCR+,TestAc+,Hospitalizados,UCI,Fallecidos,Recuperados
        # 2020-02-20,01,Andalucía,0,0,,,,,
        # 2020-02-20,02,Aragón,,0,,,,,
        # 2020-02-20,03,Asturias,,0,,,,,
        # 2020-02-20,04,Baleares,,1,,,,,
        # 2020-02-20,05,Canarias,,1,,,,,
        # 2020-02-20,06,Cantabria,,0,,,,,
        # 2020-02-20,08,Castilla La Mancha,,0,,,,,
        # 2020-02-20,07,Castilla y León,,0,,,,,

        with open(
                self.get_path_in_dir('COVID 19/ccaa_covid19_datos_isciii.csv'),
                'r',
                encoding='utf-8') as f:
            r = self.sdpf()

            for item in csv.DictReader(f):
                #print(item)
                date = self.convert_date(item['Fecha'])
                ac_code = region_map[item['CCAA']]

                if item['Casos']:
                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='ES',
                             region_child=ac_code,
                             datatype=DataTypes.TOTAL,
                             value=int(item['Casos']),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if item['PCR+'] or item[
                        'TestAc+']:  # NOTE ME: I'm combining PCR and other tests!!
                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='ES',
                             region_child=ac_code,
                             datatype=DataTypes.TESTS_TOTAL,
                             value=int(item['PCR+'] or 0) +
                             int(item['TestAc+'] or 0),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if item['Hospitalizados']:
                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='ES',
                             region_child=ac_code,
                             datatype=DataTypes.STATUS_HOSPITALIZED,
                             value=int(item['Hospitalizados']),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if item['Fallecidos']:
                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='ES',
                             region_child=ac_code,
                             datatype=DataTypes.STATUS_DEATHS,
                             value=int(item['Fallecidos']),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

                if item.get('Recuperados'):
                    r.append(region_schema=Schemas.ADMIN_1,
                             region_parent='ES',
                             region_child=ac_code,
                             datatype=DataTypes.STATUS_RECOVERED,
                             value=int(item['Recuperados']),
                             date_updated=date,
                             source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 28
0
    def _get_municipality_data(self):
        out = DataPointMerger()
        base_dir = self.get_path_in_dir('')

        for date in self.iter_nonempty_dirs(base_dir):
            r = self.sdpf()
            path = f'{base_dir}/{date}/municipality_data.json'
            try:
                with open(path, 'r', encoding='utf-8') as f:
                    data = json.loads(f.read())
            except UnicodeDecodeError:
                import brotli
                with open(path, 'rb') as f:
                    data = json.loads(
                        brotli.decompress(f.read()).decode('utf-8'))

            if not 'features' in data and date <= '2020_12_21':
                continue

            for feature in data['features']:
                attributes = feature['attributes']

                if date >= '2020_12_22':
                    #print(attributes)

                    # formula is incidence = (cases / population) * 100000
                    # incidence / 100000 = cases / population
                    # incidence / 100000 * population = cases
                    print(
                        attributes['Incidência'] / 100000 *
                        attributes['Total'], attributes['Incidência'] / 100000,
                        attributes['Incidência'], attributes['Total'])
                    confirmed = round(
                        attributes['Incidência'] / 100000 *
                        attributes['Total']) if attributes['Incidência'] else 0

                    if confirmed is not None:
                        r.append(
                            region_schema=Schemas.PT_MUNICIPALITY,
                            region_parent='PT',  # 'Distrito' -> district??
                            region_child=attributes['Concelho'],
                            datatype=DataTypes.TOTAL,
                            value=confirmed,
                            date_updated=date,
                            source_url=self.SOURCE_URL)
                else:
                    if attributes['Data_Conc'] is None:
                        continue

                    # Only confirmed and deaths are shown in the dashboard
                    date = datetime.datetime.fromtimestamp(
                        attributes['Data_Conc'] / 1000.0).strftime('%Y_%m_%d')
                    confirmed = attributes['ConfirmadosAcumulado_Conc']

                    if confirmed is not None:
                        r.append(
                            region_schema=Schemas.PT_MUNICIPALITY,
                            region_parent='PT',  # 'Distrito' -> district??
                            region_child=attributes['Concelho'],
                            datatype=DataTypes.TOTAL,
                            value=int(confirmed),
                            date_updated=date,
                            source_url=self.SOURCE_URL)

            out.extend(r)
        return out
Ejemplo n.º 29
0
    def get_nsw_age_data(self, dir_, date, download=True):
        r = DataPointMerger()

        path_fatalitiesdata = dir_ / 'fatalitiesdata.json'
        path_agedata = dir_ / 'agedata.json'
        path_listing = dir_ / 'find-facts-about-covid-19.html'

        if not exists(path_fatalitiesdata) or not exists(path_agedata) or not exists(path_listing):
            if not download:
                return []

            urlretrieve(
                'https://nswdac-covid-19-postcode-heatmap.azurewebsites.net/datafiles/fatalitiesdata.json',
                path_fatalitiesdata
            )
            urlretrieve(
                'https://nswdac-covid-19-postcode-heatmap.azurewebsites.net/datafiles/agedata.json',
                path_agedata
            )
            urlretrieve(
                'https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                path_listing
            )

        if False:
            # This actually could be unreliable - it's on an external web service even though on the
            # same page and suspect doesn't necessarily get updated at the same as other elements on the page

            with open(path_listing, 'r', encoding='utf-8') as f:
                html = f.read()
                try:
                    _date = html.split('Last updated')[1].strip().partition(' ')[-1].split('.')[0].strip()
                    date = datetime.strptime(_date, '%d %B %Y').strftime('%Y_%m_%d')
                except IndexError:
                    # It seems this info isn't always supplied(?) =============================================================
                    import traceback
                    traceback.print_exc()

        with open(path_agedata, 'r', encoding='utf-8') as f:
            # {"data":[{"ageGroup":"0-9","Males":null,"Females":null},
            agedata = json.loads(f.read())

            for age_dict in agedata['data']:
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.TOTAL_MALE,
                    value=age_dict['Males'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID
                ))
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.TOTAL_FEMALE,
                    value=age_dict['Females'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID
                ))
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.TOTAL,
                    value=(age_dict['Females'] or 0) + (age_dict['Males'] or 0),
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID
                ))

        """
        with open(path_fatalitiesdata, 'r', encoding='utf-8') as f:
            agedata = json.loads(f.read())

            for age_dict in agedata['data']:
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.STATUS_DEATHS_MALE,
                    value=age_dict['Males'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID_WEBSITE_DATA
                ))
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.STATUS_DEATHS_FEMALE,
                    value=age_dict['Females'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID_WEBSITE_DATA
                ))
        """

        return r
    def get_datapoints(self):
        r = DataPointMerger()
        ua = URLArchiver(f'{self.STATE_NAME}/current_statistics')
        ua.get_url_data(self.STATS_BY_REGION_URL_2,
                        cache=False if ALWAYS_DOWNLOAD_LISTING else True)

        for period in ua.iter_periods():
            for subperiod_id, subdir in ua.iter_paths_for_period(period):
                path = ua.get_path(subdir)

                with open(path, 'r', encoding='utf-8', errors='ignore') as f:
                    html = f.read()

                cbr = self._get_total_cases_by_region(
                    self.STATS_BY_REGION_URL_2, html)
                if cbr: r.extend(cbr)

                total = self._get_total_cases(self.STATS_BY_REGION_URL_2, html)
                if total: r.append(total)

                new = self._get_total_new_cases(self.STATS_BY_REGION_URL_2,
                                                html)
                if new: r.append(new)

                tested = self._get_total_cases_tested(
                    self.STATS_BY_REGION_URL_2, html)
                if tested: r.append(tested)

                age_breakdown = self._get_total_age_breakdown(
                    self.STATS_BY_REGION_URL_2, html)
                if age_breakdown: r.extend(age_breakdown)

                dhr = self._get_total_dhr(self.STATS_BY_REGION_URL_2, html)
                if dhr: r.extend(dhr)

                soi = self._get_total_source_of_infection(
                    self.STATS_BY_REGION_URL_2, html)
                if soi: r.extend(soi)

        r.extend(StateNewsBase.get_datapoints(self))
        return r