Python ensure_dirs Exemples, helpers.ensure_dirs Python Exemples

Exemple #1

0

Afficher le fichier

def scrape_bolivia():
    cwd = getcwd()
    bolivia_dir = path.join(cwd, 'data', 'bolivia')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(bolivia_dir, tmp_dir)

    data = requests.get(URL).json()

    for key, iso in REGION_ISO.items():
        region_data = defaultdict(dict)
        for entry in data['confirmados']:
            region_data[entry['fecha']]['cases'] = entry['dep'][key]
        for entry in data['decesos']:
            region_data[entry['fecha']]['deaths'] = entry['dep'][key]
        for entry in data['recuperados']:
            region_data[entry['fecha']]['recovered'] = entry['dep'][key]
        for date in region_data.keys():
            region_data[date]['date'] = date
            region_data[date]['region_iso'] = iso
            region_data[date]['region'] = ISO_REGION[iso]
            region_data[date]['province'] = ''
            region_data[date]['city'] = ''
            region_data[date]['place_type'] = 'departamento'
        df = pd.DataFrame(region_data.values(),
                          columns=[
                              'date', 'region_iso', 'region', 'province',
                              'city', 'place_type', 'cases', 'deaths',
                              'recovered'
                          ])
        region_file = path.join(bolivia_dir, f'{iso.lower()}.csv')
        df.to_csv(region_file, index=False, float_format='%.f')

    with open(path.join(getcwd(), 'data', 'bolivia', 'README.md'),
              'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #2

0

Afficher le fichier

Fichier : countries.py Projet : inf-covid19/data

def scrape_countries():
    cwd = getcwd()
    countries_dir = path.join(cwd, 'data', 'countries')
    ensure_dirs(countries_dir)

    countries = {}
    df = pd.read_csv(COUNTRIES_DATA, parse_dates=[0], dayfirst=True)

    for country in df['location'].unique():
        is_country = df['location'] == country
        country_filename = country.lower().replace(' ', '_') + '.csv'
        country_file = path.join(countries_dir, country_filename)
        countries[country] = country_filename

        country_df = df[is_country]

        country_df.rename(columns={
            "date": "dateRep",
            "new_cases": "cases",
            "new_deaths": "deaths",
            "location": "countriesAndTerritories",
            "iso_code": "countryterritoryCode",
            "population": "popData2019",
            "continent": "continentExp",
        },
                          inplace=True)

        country_df.to_csv(country_file, index=False, float_format='%.f')

    with open(path.join(countries_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents(countries))

Exemple #3

0

Afficher le fichier

def scrape_uruguay():
    cwd = getcwd()
    uruguay_dir = path.join(cwd, 'data', 'uruguay')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(uruguay_dir, tmp_dir)

    not_number_regexp = re.compile(r'\D')

    today = str(datetime.date.today())
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')

    tables = soup.find_all('table', {'class': 'wikitable'})
    per_departament_table = None

    for table in tables:
        headers = [th.get_text().strip() for th in table.find_all('th')]
        if len(headers) > 0 and 'Departamento' == headers[0]:
            per_departament_table = table

    updated_files = []
    header = 'date,iso,region,city,place_type,cases,deaths,recovered\n'

    for tr in per_departament_table.tbody.find_all('tr'):
        cols = [td.get_text().strip() for td in tr.find_all('td')]
        if len(cols) != 5:
            continue

        departament = cols[0]
        iso = DEPARTAMENT_ISO[departament]

        line = ','.join([
            today,
            iso,
            departament,
            '',
            'departamento',
            not_number_regexp.sub('', cols[1]),
            not_number_regexp.sub('', cols[3]),
            not_number_regexp.sub('', cols[2]),
        ])

        departament_file = path.join(uruguay_dir, f'{iso.lower()}.csv')
        is_empty = not path.exists(departament_file)

        with open(departament_file, 'a+') as f:
            if is_empty:
                f.write(header)
            f.write(f'{line}\n')

        if not is_empty:
            updated_files.append(departament_file)

    ensure_consistency(updated_files, lambda row: row[:4])

    with open(path.join(getcwd(), 'data', 'uruguay', 'README.md'),
              'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #4

0

Afficher le fichier

Fichier : ecuador.py Projet : johnfelipe/data

def scrape_ecuador():
    cwd = getcwd()
    ecuador_dir = path.join(cwd, 'data', 'ecuador')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(ecuador_dir, tmp_dir)

    not_number_regexp = re.compile(r'\D')

    today = str(datetime.date.today())
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')

    tables = soup.find_all('table', {'class': 'sortable'})
    per_province_table = None

    for table in tables:
        headers = [th.get_text().strip() for th in table.find_all('th')]
        if len(headers) > 0 and 'Provincias' == headers[0]:
            per_province_table = table

    updated_files = []
    header = 'date,iso,province,city,place_type,cases,deaths\n'

    for tr in per_province_table.tbody.find_all('tr'):
        cols = [td.get_text().strip() for td in tr.find_all('td')]
        if len(cols) != 3:
            continue

        province = cols[0]
        iso = PROVINCE_ISO[province]

        line = ','.join([
            today,
            iso,
            province,
            '',
            'unknown' if iso == 'UNK' else 'province',
            not_number_regexp.sub('', cols[1]),
            not_number_regexp.sub('', cols[2]),
        ])

        province_file = path.join(ecuador_dir, f'{iso.lower()}.csv')
        is_empty = not path.exists(province_file)

        with open(province_file, 'a+') as f:
            if is_empty:
                f.write(header)
            f.write(f'{line}\n')

        if not is_empty:
            updated_files.append(province_file)

    ensure_consistency(updated_files, lambda row: row[:4])

    with open(path.join(getcwd(), 'data', 'ecuador', 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #5

0

Afficher le fichier

Fichier : canada.py Projet : johnfelipe/data

def scrape_canada():
    cwd = getcwd()
    canada_dir = path.join(cwd, 'data', 'canada')
    ensure_dirs(canada_dir)

    df_cases = pd.read_csv(CASES_URL)
    df_deaths = pd.read_csv(DEATHS_URL)
    df_recovered = pd.read_csv(RECOVERED_URL)

    data = defaultdict(lambda: defaultdict(dict))

    for _, row in df_cases.iterrows():
        date = row['date_report']
        code = row['province']
        data[code][date]['cases'] = row['cumulative_cases']

    for _, row in df_deaths.iterrows():
        date = row['date_death_report']
        code = row['province']
        data[code][date]['deaths'] = row['cumulative_deaths']

    for _, row in df_recovered.iterrows():
        date = row['date_recovered']
        code = row['province']
        data[code][date]['recovered'] = row['cumulative_recovered']


    for code, region_data in data.items():
        if code not in CODE_REGION:
            continue

        region = CODE_REGION[code]
        iso = REGION_ISO[region]
        place_type = 'province'
        if iso in TERRITORIES:
            place_type = 'territory'
        for date in region_data.keys():
            region_data[date]['date'] = date
            region_data[date]['iso'] = iso
            region_data[date]['province'] = region
            region_data[date]['city'] = ''
            region_data[date]['place_type'] = place_type
        df = pd.DataFrame(region_data.values(), columns=[
                          'date', 'iso', 'province', 'city', 'place_type', 'cases', 'deaths', 'recovered'])

        df['date'] = pd.to_datetime(df['date'], dayfirst=True)
        region_file = path.join(canada_dir, f'{iso.lower()}.csv')
        df.to_csv(region_file, index=False, float_format='%.f')

    with open(path.join(canada_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #6

0

Afficher le fichier

def scrape_by_counties():
    cwd = getcwd()
    sweden_dir = path.join(cwd, 'data', 'sweden')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(sweden_dir, tmp_dir)

    today = str(datetime.date.today())
    r = requests.get(DATA_PER_COUNTY)
    data = r.json()

    updated_county_files = []
    header = 'date,county,county_iso,city,place_type,cases,deaths,estimated_population_2019,area_km2,confirmed_per_100k_inhabitants,critical\n'

    for feat in data['features']:
        attributes = feat['attributes']

        county = attributes['Region']
        iso = COUNTY_ISO_MAPPED[county].lower()
        confirmed = attributes['Totalt_antal_fall']
        deaths = attributes['Totalt_antal_avlidna']
        confirmed_per_100k = attributes['Fall_per_100000_inv']
        critical = attributes['Totalt_antal_intensivvårdade']

        line = ','.join([
            today,
            county,
            iso.upper(),
            '',
            'county',
            str(confirmed),
            str(deaths),
            str(COUNTY_POPULATION_MAPPED[county]),
            str(COUNTY_AREA_MAPPED[county]),
            str(confirmed_per_100k),
            str(critical) if critical is not None else '',
        ])

        county_file = path.join(sweden_dir, f'{iso}.csv')
        is_empty = not path.exists(county_file)

        with open(county_file, 'a+') as f:
            if is_empty:
                f.write(header)
            f.write(f'{line}\n')

        if not is_empty:
            updated_county_files.append(county_file)

    ensure_consistency(updated_county_files, lambda a: a[:5])

Exemple #7

0

Afficher le fichier

def scrape_united_states_of_america():
    cwd = getcwd()
    us_dir = path.join(cwd, 'data', 'united_states_of_america')
    tmp_dir = path.join(cwd, 'tmp')

    ensure_dirs(us_dir, tmp_dir)

    headers = [
        'date', 'state', 'county', 'place_type', 'fips', 'cases', 'deaths'
    ]

    counties_df = pd.read_csv(COUNTIES_DATASET)
    counties_df = counties_df.sort_values(by=['state', 'county', 'date'],
                                          ascending=[True, True, False])
    counties_df['place_type'] = 'county'
    counties_df = counties_df[headers]

    states_df = pd.read_csv(STATES_DATASET)
    states_df = states_df.sort_values(by=['state', 'date'],
                                      ascending=[True, False])
    states_df['county'] = ''
    states_df['place_type'] = 'state'
    states_df = states_df[headers]

    states_fips = {}
    fipses = states_df['fips'].unique()
    for fips in fipses:
        is_current_fips = states_df['fips'] == fips
        fips_file = path.join(us_dir, f'{fips:02d}.csv')
        current_df = states_df[is_current_fips]
        current_df.to_csv(fips_file, index=False, float_format='%.f')

        state = current_df['state'].iloc[0]
        is_same_fips = counties_df['state'] == state
        current_counties_df = counties_df[is_same_fips]
        current_counties_df.to_csv(fips_file,
                                   index=False,
                                   header=False,
                                   mode='a',
                                   float_format='%.f')

        states_fips[f'{fips:02d}'] = state

    with open(path.join(us_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents(states_fips))

Exemple #8

0

Afficher le fichier

Fichier : argentina.py Projet : johnfelipe/data

def scrape_argentina():
    cwd = getcwd()
    argentina_dir = path.join(cwd, 'data', 'argentina')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(argentina_dir, tmp_dir)

    page = requests.get(URL).json()

    updated_files = []
    header = 'date,region_iso,region,province,city,place_type,cases,deaths,recovered\n'
    for dep in page:
        if dep['provincia-key'] == 'totales':
            continue
        region = CODE_REGION[dep['provincia-key']]
        day = str(
            datetime.datetime.strptime(dep['ultima-actualizacion'],
                                       '%d/%m/%Y'))[:10]
        iso = REGION_ISO[region]
        confirmed = get(dep, 'Afectados', '0')
        deaths = get(dep, 'Muertos', '0')
        recovered = get(dep, 'Recuperados', '0')
        line = ','.join([
            day, iso, region, '', '',
            'unknown' if iso == 'UNK' else 'provincia',
            str(confirmed),
            str(deaths),
            str(recovered)
        ])

        region_file = path.join(argentina_dir, f'{iso.lower()}.csv')
        is_empty = not path.exists(region_file)

        with open(region_file, 'a+') as f:
            if is_empty:
                f.write(header)
            f.write(f'{line}\n')

        if not is_empty:
            updated_files.append(region_file)

    ensure_consistency(updated_files, lambda row: row[:5])

    with open(path.join(getcwd(), 'data', 'argentina', 'README.md'),
              'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #9

0

Afficher le fichier

Fichier : spain.py Projet : johnfelipe/data

def scrape_spain():
    cwd = getcwd()
    spain_dir = path.join(cwd, 'data', 'spain')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(spain_dir, tmp_dir)

    headers = ['date', 'region', 'city',
               'place_type', 'iso', 'cases', 'deaths', 'hospitalized', 'critical']

    df = pd.read_csv(COUNTIES_DATASET, parse_dates=[1], dayfirst=True, encoding='iso-8859-1', error_bad_lines=False)
    df = df.rename(columns={
        'CCAA': 'iso',
        'FECHA': 'date',
        'CASOS': 'cases',
        'Hospitalizados': 'hospitalized',
        'UCI': 'critical',
        'Fallecidos': 'deaths',
    })

    df = df[df['iso'].str.len() == 2]

    def fill_cases(row):
        cases = row['cases']
        if np.isnan(cases):
            return row['PCR+'] + row['TestAc+']
        return cases

    df['cases'] = df.apply(fill_cases, axis=1)

    df = df.sort_values(by=['iso', 'date'], ascending=[True, False])
    df['region'] = df.apply(lambda r: CCAA_ISO[r['iso']], axis=1)
    df['city'] = ''
    df['place_type'] = 'autonomous_community'
    df = df[headers]

    for iso in df['iso'].unique():
        is_current_iso = df['iso'] == iso
        region_file = path.join(spain_dir, f'es-{iso.lower()}.csv')
        current_df = df[is_current_iso]
        current_df.to_csv(region_file, index=False, float_format='%.f')

    with open(path.join(spain_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #10

0

Afficher le fichier

def scrape_countries():
    cwd = getcwd()
    countries_dir = path.join(cwd, 'data', 'countries')
    ensure_dirs(countries_dir)

    countries = {}
    df = pd.read_csv(COUNTRIES_DATA, parse_dates=[0], dayfirst=True)

    for country in df['countriesAndTerritories'].unique():
        is_country = df['countriesAndTerritories'] == country
        country_filename = country.lower().replace(' ', '_') + '.csv'
        country_file = path.join(countries_dir, country_filename)
        countries[country] = country_filename

        country_df = df[is_country]
        country_df.to_csv(country_file, index=False, float_format='%.f')

    with open(path.join(countries_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents(countries))

Exemple #11

0

Afficher le fichier

def scrape_brazil():
    cwd = getcwd()
    brazil_dir = path.join(cwd, 'data', 'brazil')
    tmp_dir = path.join(cwd, 'tmp')

    ensure_dirs(brazil_dir, tmp_dir)

    gz_filename = path.join(tmp_dir, 'brazil.csv.gz')

    with open(gz_filename, 'wb') as gz_file:
        r = requests.get(BRAZIL_DATA, allow_redirects=True)
        gz_file.write(r.content)

    states = {}
    prev_state = ''
    header = ''
    curr_lines = []

    def write_file():
        with open(path.join(brazil_dir, f'{prev_state}.csv'),
                  'w') as state_file:
            state_file.writelines([header] + curr_lines)

    with gzip.open(gz_filename, 'rt') as f:
        for line in f:
            if header == '':
                header = line
                continue

            state = line.split(',')[1].lower()
            if len(prev_state) > 0 and state != prev_state:
                write_file()
                curr_lines = []

            curr_lines.append(line)
            prev_state = state
            states[state] = True

    write_file()

    with open(path.join(brazil_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #12

0

Afficher le fichier

def scrape_australia():
    cwd = getcwd()
    australia_dir = path.join(cwd, 'data', 'australia')
    ensure_dirs(australia_dir)

    df_cases = pd.read_csv(CASES_URL)
    df_deaths = pd.read_csv(DEATHS_URL)
    df_recovered = pd.read_csv(RECOVERED_URL)

    data = defaultdict(lambda: defaultdict(dict))

    for _, row in df_cases.iterrows():
        date = row['Date']
        for iso in ISO_REGION.keys():
            data[iso][date]['cases'] = row[iso]
    
    for _, row in df_deaths.iterrows():
        date = row['Date']
        for iso in ISO_REGION.keys():
            data[iso][date]['deaths'] = row[iso]
    
    for _, row in df_recovered.iterrows():
        date = row['Date']
        for iso in ISO_REGION.keys():
            data[iso][date]['recovered'] = row[iso]
    
    for iso, region_data in data.items():
        place_type = 'state'
        if iso in ['ACT', 'NT']:
            place_type = 'territory'
        for date in region_data.keys():
            region_data[date]['date'] = date
            region_data[date]['iso'] = iso
            region_data[date]['state'] = ISO_REGION[iso]
            region_data[date]['city'] = ''
            region_data[date]['place_type'] = place_type
        df = pd.DataFrame(region_data.values(), columns=['date', 'iso', 'state', 'city', 'place_type', 'cases', 'deaths', 'recovered'])
        region_file = path.join(australia_dir, f'{iso.lower()}.csv')
        df.to_csv(region_file, index=False, float_format='%.f')

    with open(path.join(australia_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #13

0

Afficher le fichier

Fichier : peru.py Projet : johnfelipe/data

def scrape_peru():
    cwd = getcwd()
    peru_dir = path.join(cwd, 'data', 'peru')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(peru_dir, tmp_dir)

    not_number_regexp = re.compile(r'\D')

    today = str(datetime.date.today())
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')

    tables = soup.find_all('table', {'class': 'sortable'})
    per_departament_table = None

    for table in tables:
        headers = [th.get_text().strip() for th in table.find_all('th')]
        if len(headers) > 0 and 'Departamento' in headers[0]:
            per_departament_table = table

    updated_files = []
    header = 'date,iso,region,city,place_type,cases,deaths\n'

    mapped = {}

    for tr in per_departament_table.tbody.find_all('tr'):
        headers = [th.get_text().strip() for th in tr.find_all('th')]
        cols = [td.get_text().strip() for td in tr.find_all('td')]
        if len(cols) != 10:
            continue

        departament = headers[0]

        cases = int(not_number_regexp.sub('', cols[1]))
        deaths = int(not_number_regexp.sub('', cols[3]))

        if 'Lima' in departament:
            departament = 'Lima'
            if 'Lima' in mapped:
                _cases, _deaths = mapped['Lima']
                cases += _cases
                deaths += _deaths
            else:
                mapped['Lima'] = (cases, deaths)
                continue

        iso = DEPARTAMENT_ISO[departament]

        line = ','.join([
            today,
            iso,
            departament,
            '',
            'departamento',
            str(cases),
            str(deaths),
        ])

        departament_file = path.join(peru_dir, f'{iso.lower()}.csv')
        is_empty = not path.exists(departament_file)

        with open(departament_file, 'a+') as f:
            if is_empty:
                f.write(header)
            f.write(f'{line}\n')

        if not is_empty:
            updated_files.append(departament_file)

    ensure_consistency(updated_files, lambda row: row[:4])

    with open(path.join(getcwd(), 'data', 'peru', 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #14

0

Afficher le fichier

def scrape_united_kingdom():
    cwd = getcwd()
    tmp_dir = path.join(cwd, 'tmp', 'united_kingdom')
    uk_dir = path.join(cwd, 'data', 'united_kingdom')
    ensure_dirs(uk_dir, tmp_dir)

    england_cases_by_area_url = requests.get(ENGLAND_CASES_BY_AREA).url
    deaths_by_area_csv = requests.get(DEATHS_BY_AREA, verify=False).text

    headers = [
        'date', 'country', 'region', 'place_type', 'geo_code', 'cases',
        'deaths'
    ]

    deaths_df = pd.read_csv(io.StringIO(deaths_by_area_csv), parse_dates=[3])
    deaths_df = deaths_df.set_index(['areaName', 'areaType', 'date'])

    df = pd.read_csv(UK_CASES_BY_AREA, parse_dates=[0], dayfirst=True)
    df = df.rename(
        columns={
            'GSS_CD': 'geo_code',
            'type': 'place_type',
            'confirm': 'cases',
            'area': 'region',
        })
    df = df.fillna(value={'place_type': 'unknown'})

    df['place_type'] = df.apply(lambda r: get_place_type(r['place_type']),
                                axis=1)
    df = df.sort_values(by=['country', 'region', 'date'],
                        ascending=[True, True, False])

    def fill_deaths(row):
        area_type = row['place_type']
        if area_type == 'country':
            area_type = 'nation'
        key = (row['region'], area_type, row['date'])
        if not key in deaths_df.index:
            return np.NaN
        return deaths_df.loc[key]['cumDeaths28DaysByDeathDate']

    df['deaths'] = df.apply(fill_deaths, axis=1)

    df = df[headers]

    countries = {}

    for country in df['country'].unique():
        is_country_data = df['region'] == country
        is_not_country_data = df['region'] != country
        is_current_country = df['country'] == country
        country_filename = country.lower().replace(' ', '_') + '.csv'
        country_file = path.join(uk_dir, country_filename)
        countries[country] = country_filename

        country_df = df[is_country_data]
        country_df.to_csv(country_file, index=False, float_format='%.f')

        regions_df = df[is_current_country & is_not_country_data]
        regions_df.to_csv(country_file,
                          index=False,
                          header=False,
                          float_format='%.f',
                          mode='a')

    england_df = pd.read_csv(england_cases_by_area_url, parse_dates=[3])
    england_df = england_df.rename(
        columns={
            'Area name': 'region',
            'Area code': 'geo_code',
            'Cumulative lab-confirmed cases': 'cases',
            'Area type': 'place_type',
            'Specimen date': 'date'
        })
    england_df = england_df.fillna(value={'place_type': 'unknown'})
    england_df['country'] = 'England'
    england_df['place_type'] = england_df.apply(
        lambda r: get_place_type(r['place_type']), axis=1)
    england_df['deaths'] = england_df.apply(fill_deaths, axis=1)
    england_df = england_df.sort_values(by=['country', 'region', 'date'],
                                        ascending=[True, True, False])

    england_df = england_df[headers]
    england_filename = 'england.csv'
    england_file = path.join(uk_dir, england_filename)
    countries['England'] = england_filename

    england_df[england_df['place_type'] == 'country'].to_csv(
        england_file, index=False, float_format='%.f')
    england_df[england_df['place_type'] != 'country'].to_csv(
        england_file, index=False, float_format='%.f', header=False, mode='a')

    with open(path.join(uk_dir, 'README.md'), 'w') as readme_f:
        readme_f.write(get_readme_contents(countries))

Exemple #15

0

Afficher le fichier

def scrape_chile():
    cwd = getcwd()
    chile_dir = path.join(cwd, 'data', 'chile')
    tmp_dir = path.join(cwd, 'tmp')
    ensure_dirs(chile_dir, tmp_dir)

    today = str(datetime.date.today())
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')
    not_number_regexp = re.compile(r'\D')

    per_region_table = None
    tables = soup.find_all('table')

    for table in tables:
        headers = table.find_all('th')
        if len(headers) > 0 and 'Regiones' in headers[0].get_text():
            per_region_table = table
            break

    updated_files = []
    header = 'date,region,region_iso,province,city,place_type,cases,deaths\n'
    for tr in per_region_table.find_all('tr')[2:-1]:
        cols = [td.get_text() for td in tr.find_all('td')]
        if len(cols) != 6:
            continue

        iso = None
        for region in REGION_ISO:
            if region in cols[0]:
                iso = REGION_ISO[region]
                break

        if iso is None:
            continue

        region = ISO_REGION[iso]

        line = ','.join([
            today,
            region,
            iso,
            '',
            '',
            'region',
            not_number_regexp.sub('', cols[2]),
            not_number_regexp.sub('', cols[4]),
        ])

        region_file = path.join(chile_dir, f'{iso.lower()}.csv')
        is_empty = not path.exists(region_file)

        with open(region_file, 'a+') as f:
            if is_empty:
                f.write(header)
            f.write(f'{line}\n')

        if not is_empty:
            updated_files.append(region_file)

    ensure_consistency(updated_files, lambda row: row[:5])

    with open(path.join(getcwd(), 'data', 'chile', 'README.md'),
              'w') as readme_f:
        readme_f.write(get_readme_contents())

Exemple #16

0

Afficher le fichier

def scrape_additional():
    cwd = getcwd()
    ensure_dirs(path.join(cwd, 'data', 'sweden', 'additional'))

    scrape_cases_by_age()
    scrape_deaths_by_age()