Python Download.Download 예제들, hdx.utilities.downloader.Download.Download Python 예제들

예제 #1

0

파일 보기

def main(save, use_saved, **ignore):
    """Generate dataset and create it in HDX"""

    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as token_downloader:
        configuration = Configuration.read()
        with Download() as downloader:
            folder = temp_dir(lookup)
            retriever = Retrieve(downloader, folder, 'saved_data', folder,
                                 save, use_saved)
            wfp = WFPFood(configuration, token_downloader, retriever)
            countries = wfp.get_countries()
            logger.info('Number of country datasets to upload: %d' %
                        len(countries))
            wfp.build_mappings()
            for info, country in progress_storing_tempdir(
                    lookup, countries, 'iso3'):

                dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase(
                    country['iso3'], info['folder'])
                if dataset:
                    dataset.update_from_yaml()
                    dataset['notes'] = dataset[
                        'notes'] % 'Food Prices data for %s. Food prices data comes from the World Food Programme and covers' % country[
                            'name']
                    dataset.generate_resource_view(-1,
                                                   indicators=qc_indicators)
                    dataset.create_in_hdx(
                        remove_additional_resources=True,
                        hxl_update=False,
                        updated_by_script='HDX Scraper: WFP Food Prices',
                        batch=info['batch'])
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)

예제 #2

0

파일 보기

 def test_setup_stream(self, fixtureurl, fixturenotexistsurl, getfixtureurl,
                       postfixtureurl):
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.setup('NOTEXIST://NOTEXIST.csv')
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.setup(fixturenotexistsurl)
     with Download() as downloader:
         downloader.setup(fixtureurl)
         headers = downloader.response.headers
         assert headers['Content-Length'] == '728'
     with Download() as downloader:
         downloader.setup(postfixtureurl, post=True)
         headers = downloader.response.headers
         assert bool(re.match(r'3[56]\d',
                              headers['Content-Length'])) is True
         downloader.setup('%s?id=10&lala=a' % getfixtureurl,
                          post=False,
                          parameters=OrderedDict([('b', '4'), ('d', '3')]))
         assert downloader.get_json()['args'] == OrderedDict([('b', '4'),
                                                              ('d', '3'),
                                                              ('id', '10'),
                                                              ('lala', 'a')
                                                              ])
         downloader.setup('%s?id=3&lala=b' % postfixtureurl,
                          post=True,
                          parameters=OrderedDict([('a', '3'), ('c', '2')]))
         assert downloader.get_json()['form'] == OrderedDict([('a', '3'),
                                                              ('c', '2'),
                                                              ('id', '3'),
                                                              ('lala', 'b')
                                                              ])

예제 #3

0

파일 보기

파일: test_downloader.py 프로젝트: OCHA-DAP/hdx-python-utilities

 def test_setup_stream(self, fixtureurl, fixturenotexistsurl, getfixtureurl,
                       postfixtureurl):
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.setup("NOTEXIST://NOTEXIST.csv")
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.setup(fixturenotexistsurl)
     with Download() as downloader:
         downloader.setup(fixtureurl)
         headers = downloader.response.headers
         assert bool(re.match(r"7\d\d", headers["Content-Length"])) is True
     with Download() as downloader:
         downloader.setup(postfixtureurl, post=True)
         headers = downloader.response.headers
         assert bool(re.match(r"4\d\d", headers["Content-Length"])) is True
         downloader.setup(
             f"{getfixtureurl}?id=10&lala=a",
             post=False,
             parameters=OrderedDict([("b", "4"), ("d", "3")]),
         )
         assert list(downloader.get_json()["args"].items()) == list(
             OrderedDict([("b", "4"), ("d", "3"), ("id", "10"),
                          ("lala", "a")]).items())
         downloader.setup(
             f"{postfixtureurl}?id=3&lala=b",
             post=True,
             parameters=OrderedDict([("a", "3"), ("c", "2")]),
         )
         assert list(downloader.get_json()["form"].items()) == list(
             OrderedDict([("a", "3"), ("c", "2"), ("id", "3"),
                          ("lala", "b")]).items())

예제 #4

0

파일 보기

 def test_download(self, fixtureurl, fixturenotexistsurl, getfixtureurl,
                   postfixtureurl):
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.download('NOTEXIST://NOTEXIST.csv')
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.download(fixturenotexistsurl)
     with Download() as downloader:
         result = downloader.download(fixtureurl)
         assert result.headers['Content-Length'] == '728'
         downloader.download('%s?id=10&lala=a' % getfixtureurl,
                             post=False,
                             parameters=OrderedDict([('b', '4'),
                                                     ('d', '3')]))
         assert downloader.get_json()['args'] == OrderedDict([('b', '4'),
                                                              ('d', '3'),
                                                              ('id', '10'),
                                                              ('lala', 'a')
                                                              ])
         downloader.download('%s?id=3&lala=b' % postfixtureurl,
                             post=True,
                             parameters=OrderedDict([('a', '3'),
                                                     ('c', '2')]))
         assert downloader.get_json()['form'] == OrderedDict([('a', '3'),
                                                              ('c', '2'),
                                                              ('id', '3'),
                                                              ('lala', 'b')
                                                              ])

예제 #5

0

파일 보기

파일: test_downloader.py 프로젝트: ximenacontla/hdx-python-api

 def test_download(self, fixtureurl, fixturenotexistsurl):
     with pytest.raises(DownloadError), Download() as download:
         download.download('NOTEXIST://NOTEXIST.csv')
     with pytest.raises(DownloadError), Download() as download:
         download.download(fixturenotexistsurl)
     with Download() as download:
         result = download.download(fixtureurl)
         assert result.headers['Content-Length'] == '479'

예제 #6

0

파일 보기

파일: test_downloader.py 프로젝트: ximenacontla/hdx-python-api

 def test_setup_stream(self, fixtureurl, fixturenotexistsurl):
     with pytest.raises(DownloadError), Download() as download:
         download.setup_stream('NOTEXIST://NOTEXIST.csv')
     with pytest.raises(DownloadError), Download() as download:
         download.setup_stream(fixturenotexistsurl)
     with Download() as download:
         download.setup_stream(fixtureurl)
         headers = download.response.headers
         assert headers['Content-Length'] == '479'

예제 #7

0

파일 보기

파일: test_downloader.py 프로젝트: ximenacontla/hdx-python-api

 def test_download_file(self, fixtureurl, fixturenotexistsurl):
     tmpdir = tempfile.gettempdir()
     with pytest.raises(DownloadError), Download() as download:
         download.download_file('NOTEXIST://NOTEXIST.csv', tmpdir)
     with pytest.raises(DownloadError), Download() as download:
         download.download_file(fixturenotexistsurl)
     with Download() as download:
         f = download.download_file(fixtureurl, tmpdir)
         fpath = abspath(f)
         unlink(f)
         assert fpath == abspath(join(tmpdir, 'test_data.csv'))

예제 #8

0

파일 보기

 def test_init(self, downloaderfolder):
     basicauthfile = join(downloaderfolder, 'basicauth.txt')
     with Download(basicauthfile=basicauthfile) as download:
         assert download.session.auth == ('testuser', 'testpass')
     with pytest.raises(DownloadError):
         Download(auth=('u', 'p'), basicauth='Basic xxxxxxxxxxxxxxxx')
     with pytest.raises(DownloadError):
         Download(auth=('u', 'p'), basicauthfile=join('lala', 'lala.txt'))
     with pytest.raises(DownloadError):
         Download(basicauth='Basic xxxxxxxxxxxxxxxx',
                  basicauthfile=join('lala', 'lala.txt'))
     with pytest.raises(IOError):
         Download(basicauthfile='NOTEXIST')

예제 #9

0

파일 보기

 def test_download_file(self, tmpdir, fixtureurl, fixturenotexistsurl,
                        getfixtureurl, postfixtureurl):
     tmpdir = str(tmpdir)
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.download_file('NOTEXIST://NOTEXIST.csv', tmpdir)
     with pytest.raises(DownloadError), Download() as downloader:
         downloader.download_file(fixturenotexistsurl)
     with Download() as downloader:
         f = downloader.download_file(fixtureurl, folder=tmpdir)
         fpath = abspath(f)
         remove(f)
         assert fpath == abspath(join(tmpdir, 'test_data.csv'))
         filename = 'myfilename.txt'
         f = downloader.download_file(fixtureurl,
                                      folder=tmpdir,
                                      filename=filename)
         fpath = abspath(f)
         remove(f)
         assert fpath == abspath(join(tmpdir, filename))
         f = downloader.download_file('%s?id=10&lala=a' % getfixtureurl,
                                      post=False,
                                      parameters=OrderedDict([('b', '4'),
                                                              ('d', '3')]),
                                      folder=tmpdir,
                                      filename=filename)
         fpath = abspath(f)
         with open(fpath, 'rt') as fi:
             text = fi.read()
             assert '"id": "10"' in text
             assert '"lala": "a"' in text
             assert '"b": "4"' in text
             assert '"d": "3"' in text
         remove(f)
         assert fpath == abspath(join(tmpdir, filename))
         f = downloader.download_file('%s?id=3&lala=b' % postfixtureurl,
                                      post=True,
                                      parameters=OrderedDict([('a', '3'),
                                                              ('c', '2')]),
                                      folder=tmpdir,
                                      filename=filename)
         fpath = abspath(f)
         with open(fpath, 'rt') as fi:
             text = fi.read()
             assert '"id": "3"' in text
             assert '"lala": "b"' in text
             assert '"a": "3"' in text
             assert '"c": "2"' in text
         remove(f)
         assert fpath == abspath(join(tmpdir, filename))

예제 #10

0

파일 보기

파일: run.py 프로젝트: OCHA-DAP/hdx-scraper-wfp-foodsecurity

def main():
    """Generate dataset and create it in HDX"""

    with Download() as downloader:
        configuration = Configuration.read()
        countries_path = join('config', configuration['countries_filename'])
        indicators_url = configuration['indicators_url']
        mvam_url = configuration['mvam_url']
        showcase_url = configuration['showcase_url']
        countries = get_countries(countries_path, downloader)
        variables = get_mvamvariables(indicators_url, downloader)
        logger.info('Number of datasets to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('WFPFoodSecurity',
                                                      countries, 'iso3'):
            dataset, showcase, bites_disabled = \
                generate_dataset_and_showcase(mvam_url, showcase_url, downloader, info['folder'],
                                              country, variables)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(bites_disabled=bites_disabled)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script='HDX Scraper: WFP Food Security',
                    batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

예제 #11

0

파일 보기

파일: test_downloader.py 프로젝트: OCHA-DAP/hdx-python-utilities

 def test_download_tabular_rows_as_dicts(self, fixtureprocessurl):
     with Download() as downloader:
         result = downloader.download_tabular_rows_as_dicts(
             fixtureprocessurl, headers=2)
         self.fix_strings(result)
         assert result == {
             "coal": {
                 "header2": "3",
                 "header3": "7.4",
                 "header4": "needed",
             },
             "gas": {
                 "header2": "2",
                 "header3": "6.5",
                 "header4": "n/a"
             },
         }
         result = downloader.download_tabular_rows_as_dicts(
             fixtureprocessurl, headers=2, keycolumn=2)
         self.fix_strings(result)
         assert result == {
             "2": {
                 "header1": "gas",
                 "header3": "6.5",
                 "header4": "n/a"
             },
             "3": {
                 "header1": "coal",
                 "header3": "7.4",
                 "header4": "needed",
             },
         }

예제 #12

0

파일 보기

    def data(self, configuration):
        resources = configuration["resources"]
        download_url = (Path(__file__).resolve().parent / "fixtures").as_uri()

        print(download_url)
        return get_countriesdata(download_url, resources,
                                 Download(user_agent="test"))

예제 #13

0

파일 보기

def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    download_url = configuration["download_url"]
    with Download() as downloader:
        countries, headers, countriesdata = get_countriesdata(download_url, downloader)
        logger.info(f"Number of countries: {len(countriesdata)}")
        for info, country in progress_storing_tempdir("UCDP", countries, "iso3"):
            folder = info["folder"]
            dataset, showcase = generate_dataset_and_showcase(
                folder, country, countriesdata[country["iso3"]], headers
            )
            if dataset:
                dataset.update_from_yaml()
                dataset["notes"] = dataset["notes"].replace(
                    "\n", "  \n"
                )  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UCDP",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

예제 #14

0

파일 보기

def main():
    """Generate dataset and create it in HDX"""

    filelist_url = Configuration.read()['filelist_url']
    country_group_url = Configuration.read()['country_group_url']
    dataset_codes = Configuration.read()['dataset_codes']
    showcase_base_url = Configuration.read()['showcase_base_url']
    with temp_dir('faostat') as folder:
        with Download() as downloader:
            indicatortypes = get_indicatortypesdata(filelist_url, downloader)
            countriesdata = get_countriesdata(country_group_url, downloader)
            logger.info('Number of indicator types to upload: %d' %
                        len(dataset_codes))
            for dataset_code in dataset_codes:
                datasets, showcases = generate_datasets_and_showcases(
                    downloader, folder, dataset_codes[dataset_code],
                    indicatortypes[dataset_code], countriesdata,
                    showcase_base_url)
                logger.info('Number of datasets to upload: %d' % len(datasets))
                for i, dataset in enumerate(datasets):
                    logger.info('Creating dataset: %s' % dataset['title'])
                    dataset.preview_off()
                    dataset.create_in_hdx()
                    showcase = showcases[i]
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)

예제 #15

0

파일 보기

파일: run.py 프로젝트: OCHA-DAP/hdx-scraper-dhs

def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    base_url = configuration['base_url']
    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as downloader:
        downloader.session.mount(
            'http://',
            HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100))
        downloader.session.mount(
            'https://',
            HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100))
        countries = get_countries(base_url, downloader)
        logger.info('Number of countries: %d' % len(countries))

        for folder, country in progress_storing_tempdir(
                'DHS', countries, 'iso3'):
            tags = get_tags(base_url, downloader, country['dhscode'])
            dataset, subdataset, showcase, bites_disabled = \
                generate_datasets_and_showcase(configuration, base_url, downloader, folder, country, tags)
            if dataset:
                createdataset(dataset)
                resource_view = generate_resource_view(
                    dataset, bites_disabled=bites_disabled['national'])
                resource_view.create_in_hdx()
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
            if subdataset:
                createdataset(subdataset)
                showcase.add_dataset(subdataset)
                subdataset.generate_resource_view(
                    bites_disabled=bites_disabled['subnational'])

예제 #16

0

파일 보기

def main():
    """Generate dataset and create it in HDX"""

    with Download() as downloader:
        config = Configuration.read()
        project_config = {
            key: value
            for key, value in config.items() if key.startswith("CV")
        }
        qc_indicators = config.get("qc_indicators", {})
        countries, countriesdata, headers = get_all_countriesdata(
            project_config, downloader)

        logger.info("Number of datasets to upload: %d" % len(countries))
        for info, country in progress_storing_tempdir("UNICEFSAM", countries,
                                                      "iso3"):
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                info["folder"], country, countriesdata[country["iso3"]],
                headers, project_config, qc_indicators)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(1,
                                               bites_disabled=bites_disabled,
                                               indicators=qc_indicators)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNICEF Sam",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

예제 #17

0

파일 보기

파일: run.py 프로젝트: OCHA-DAP/hdx-scraper-iati-viz

def main(
    output_dir,
    saved_dir,
    save,
    use_saved,
    dportal_params,
    whattorun,
    filterdate,
    **ignore,
):
    logger.info(f"##### hdx-scraper-iati-viz version {VERSION:.1f} ####")
    configuration = Configuration.read()
    output_dir = f"{output_dir}_{whattorun}"
    rmtree(output_dir, ignore_errors=True)
    mkdir(output_dir)
    with Download() as downloader:
        retriever = Retrieve(
            downloader,
            configuration["fallback_dir"],
            f"{saved_dir}_{whattorun}",
            output_dir,
            save,
            use_saved,
        )
        today = datetime.utcnow().isoformat()
        start(
            configuration,
            today,
            retriever,
            output_dir,
            dportal_params,
            whattorun,
            filterdate,
        )

예제 #18

0

파일 보기

파일: run.py 프로젝트: orest-d/hdx-scraper-covid-viz

def main(excel_path, gsheet_auth, updatesheets, updatetabs, scrapers,
         basic_auths, nojson, **ignore):
    logger.info('##### hdx-scraper-covid-viz version %.1f ####' % VERSION)
    configuration = Configuration.read()
    with Download(rate_limit={'calls': 1, 'period': 0.1}) as downloader:
        if scrapers:
            logger.info('Updating only scrapers: %s' % scrapers)
        tabs = configuration['tabs']
        if updatetabs is None:
            updatetabs = list(tabs.keys())
            logger.info('Updating all tabs')
        else:
            logger.info('Updating only these tabs: %s' % updatetabs)
        noout = nooutput(updatetabs)
        if excel_path:
            excelout = exceloutput(excel_path, tabs, updatetabs)
        else:
            excelout = noout
        if gsheet_auth:
            gsheets = googlesheets(configuration, gsheet_auth, updatesheets,
                                   tabs, updatetabs)
        else:
            gsheets = noout
        if nojson:
            jsonout = noout
        else:
            jsonout = jsonoutput(configuration, updatetabs)
        outputs = {'gsheets': gsheets, 'excel': excelout, 'json': jsonout}
        admininfo = AdminInfo.setup(downloader)
        get_indicators(configuration, downloader, admininfo, outputs,
                       updatetabs, scrapers, basic_auths)
        excelout.save()
        jsonout.add_additional_json(downloader)
        jsonout.save(hrp_iso3s=admininfo.hrp_iso3s)

예제 #19

0

파일 보기

def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    hdro_url = configuration['hdro_url']
    qc_indicators = configuration['qc_indicators']
    with Download() as downloader:
        countriesdata = get_countriesdata(hdro_url, downloader)
        countries = [{
            'iso3': countryiso
        } for countryiso in sorted(countriesdata.keys())]
        logger.info('Number of countries to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('HDRO', countries,
                                                      'iso3'):
            countryiso = country['iso3']
            countrydata = countriesdata[countryiso]
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                info['folder'], countryiso, countrydata, qc_indicators)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(-1,
                                               bites_disabled=bites_disabled,
                                               indicators=qc_indicators)
                dataset.create_in_hdx(remove_additional_resources=True,
                                      hxl_update=False,
                                      updated_by_script='HDX Scraper: HDRO',
                                      batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

예제 #20

0

파일 보기

파일: test_iati.py 프로젝트: OCHA-DAP/hdx-scraper-iati-viz

 def test_run(self, configuration, fixtures_dir):
     with temp_dir("TestIATIViz",
                   delete_on_success=True,
                   delete_on_failure=False) as tempdir:
         with Download(user_agent="test") as downloader:
             retriever = Retrieve(
                 downloader,
                 tempdir,
                 fixtures_dir,
                 tempdir,
                 save=False,
                 use_saved=True,
             )
             today = "2021-05-06"
             start(
                 configuration,
                 today,
                 retriever,
                 tempdir,
                 dportal_params=None,
                 whattorun="covid",
                 filterdate="2020-01",
             )
             for filename in ("flows", "transactions", "reporting_orgs"):
                 csv_filename = f"{filename}.csv"
                 expected_file = join(fixtures_dir, csv_filename)
                 actual_file = join(tempdir, csv_filename)
                 assert_files_same(expected_file, actual_file)
                 json_filename = f"{filename}.json"
                 expected_file = join(fixtures_dir, json_filename)
                 actual_file = join(tempdir, json_filename)
                 assert filecmp.cmp(expected_file, actual_file)

예제 #21

0

파일 보기

파일: run.py 프로젝트: davidmegginson/hdx-scraper-unhcr-population

def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    resources = configuration["resources"]
    fields = configuration["fields"]
    source_directory = configuration["source_directory"]
    download_url = Path(source_directory).resolve().as_uri()

    with Download() as downloader:
        countries, headers, countriesdata = get_countriesdata(
            download_url, resources, downloader)
        logger.info("Number of countries: %d" % len(countriesdata))
        for info, country in progress_storing_tempdir("UNHCR_population",
                                                      countries, "iso3"):
            folder = info["folder"]

            dataset, showcase = generate_dataset_and_showcase(
                folder, country, countriesdata[country["iso3"]], headers,
                resources, fields)
            if dataset:
                dataset.update_from_yaml()
                dataset["notes"] = dataset["notes"].replace(
                    "\n", "  \n")  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNHCR population",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

예제 #22

0

파일 보기

파일: run.py 프로젝트: OCHA-DAP/hdx-scraper-worldpop

def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    indicators = configuration["indicators"]
    json_url = configuration["json_url"]
    with Download() as downloader:
        indicators_metadata = get_indicators_metadata(json_url, downloader,
                                                      indicators)
        countriesdata, countries = get_countriesdata(json_url, downloader,
                                                     indicators)
        logger.info(f"Number of countries to upload: {len(countries)}")

        for info, country in progress_storing_tempdir("WorldPop", countries,
                                                      "iso3"):
            countryiso = country["iso3"]
            datasets, showcases = generate_datasets_and_showcases(
                downloader, countryiso, indicators_metadata,
                countriesdata[countryiso])
            for dataset in datasets:
                dataset.update_from_yaml()
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: WorldPop",
                    batch=info["batch"],
                )
                for showcase in showcases.get(dataset["name"], list()):
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)

예제 #23

0

파일 보기

파일: html.py 프로젝트: OCHA-DAP/hdx-python-utilities

def get_soup(
    url: str,
    downloader: Download = None,
    user_agent: Optional[str] = None,
    user_agent_config_yaml: Optional[str] = None,
    user_agent_lookup: Optional[str] = None,
    **kwargs: Any,
) -> BeautifulSoup:
    """
    Get BeautifulSoup object for a url. Requires either global user agent to be set or appropriate user agent
    parameter(s) to be completed.

    Args:
        url (str): url to read
        downloader (Download): Download object. Defaults to creating a Download object with given user agent values.
        user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed.
        user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yml.
        user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied.

    Returns:
        BeautifulSoup: The BeautifulSoup object for a url

    """
    if not downloader:
        downloader = Download(user_agent, user_agent_config_yaml,
                              user_agent_lookup, **kwargs)
    response = downloader.download(url)
    return BeautifulSoup(response.text, "html.parser")

예제 #24

0

파일 보기

파일: run.py 프로젝트: OCHA-DAP/hdx-scraper-covid-viz

def main(excel_path, gsheet_auth, updatesheets, updatetabs, scrapers, basic_auths, other_auths, nojson,
         countries_override, save, use_saved, **ignore):
    logger.info('##### hdx-scraper-covid-viz version %.1f ####' % VERSION)
    configuration = Configuration.read()
    with temp_dir() as temp_folder:
        with Download(rate_limit={'calls': 1, 'period': 0.1}) as downloader:
            retriever = Retrieve(downloader, temp_folder, 'saved_data', temp_folder, save, use_saved)
            if scrapers:
                logger.info('Updating only scrapers: %s' % scrapers)
            tabs = configuration['tabs']
            if updatetabs is None:
                updatetabs = list(tabs.keys())
                logger.info('Updating all tabs')
            else:
                logger.info('Updating only these tabs: %s' % updatetabs)
            noout = NoOutput(updatetabs)
            if excel_path:
                excelout = ExcelOutput(excel_path, tabs, updatetabs)
            else:
                excelout = noout
            if gsheet_auth:
                gsheets = GoogleSheets(configuration, gsheet_auth, updatesheets, tabs, updatetabs)
            else:
                gsheets = noout
            if nojson:
                jsonout = noout
            else:
                jsonout = JsonOutput(configuration, updatetabs)
            outputs = {'gsheets': gsheets, 'excel': excelout, 'json': jsonout}
            today = datetime.now()
            countries_to_save = get_indicators(configuration, today, retriever, outputs, updatetabs, scrapers,
                                               basic_auths, other_auths, countries_override)
            jsonout.add_additional_json(downloader, today=today)
            jsonout.save(countries_to_save=countries_to_save)
            excelout.save()

예제 #25

0

파일 보기

 def test_download_tabular_rows_as_dicts(self, fixtureprocessurl):
     with Download() as downloader:
         result = downloader.download_tabular_rows_as_dicts(
             fixtureprocessurl, headers=2)
         self.fix_strings(result)
         assert result == {
             'coal': {
                 'header2': '3',
                 'header3': '7.4',
                 'header4': 'needed'
             },
             'gas': {
                 'header2': '2',
                 'header3': '6.5',
                 'header4': 'n/a'
             }
         }
         result = downloader.download_tabular_rows_as_dicts(
             fixtureprocessurl, headers=2, keycolumn=2)
         self.fix_strings(result)
         assert result == {
             '2': {
                 'header1': 'gas',
                 'header3': '6.5',
                 'header4': 'n/a'
             },
             '3': {
                 'header1': 'coal',
                 'header3': '7.4',
                 'header4': 'needed'
             }
         }

예제 #26

0

파일 보기

파일: test_downloader.py 프로젝트: OCHA-DAP/hdx-python-utilities

 def test_get_tabular_rows_as_list(self, fixtureprocessurl):
     with Download() as downloader:
         rows = list(downloader.get_tabular_rows_as_list(fixtureprocessurl))
         assert rows == [
             ["la1", "ha1", "ba1", "ma1"],
             ["header1", "header2", "header3", "header4"],
             ["coal", "3", "7.4", "needed"],
             ["gas", "2", "6.5", "n/a"],
         ]

예제 #27

0

파일 보기

 def test_get_indicators(self, configuration, folder):
     with temp_dir('TestCovidViz') as tempdir:
         with Download(user_agent='test') as downloader:
             tabs = configuration['tabs']
             noout = nooutput(tabs)
             jsonout = jsonoutput(configuration, tabs)
             outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout}
             get_indicators(configuration, downloader, outputs, tabs, scrapers=['ifi', 'who', 'covid_trend'])
             filepath = jsonout.save(tempdir)
             assert_files_same(filepath, join(folder, 'test_tabular.json'))

예제 #28

0

파일 보기

 def test_download_tabular_key_value(self, fixtureurl, fixtureprocessurl):
     with Download() as downloader:
         result = downloader.download_tabular_key_value(fixtureurl,
                                                        file_type='csv')
         assert result == {'615': '2231RTA', 'GWNO': 'EVENT_ID_CNTY'}
         result = downloader.download_tabular_key_value(fixtureprocessurl,
                                                        headers=2)
         assert result == {'coal': '3', 'gas': '2'}
         with pytest.raises(DownloadError):
             downloader.download_tabular_key_value(
                 'NOTEXIST://NOTEXIST.csv')

예제 #29

0

파일 보기

def main():
    """Generate dataset and create it in HDX"""

    with Download() as downloader:
        indicators = Configuration.read()['indicators']
        tags = Configuration.read()['tags']
        folder = get_temp_dir('IDMC')
        datasets, showcase, headersdata, countriesdata = generate_indicator_datasets_and_showcase(
            downloader, folder, indicators, tags)
        showcase_not_added = True
        countries = [{'iso3': x} for x in sorted(countriesdata)]

        logger.info('Number of indicator datasets to upload: %d' %
                    len(indicators))
        logger.info('Number of country datasets to upload: %d' %
                    len(countries))
        for i, info, nextdict in multiple_progress_storing_tempdir(
                'IDMC', [indicators, countries], ['name', 'iso3']):
            folder = info['folder']
            batch = info['batch']
            if i == 0:
                if showcase_not_added:
                    showcase.create_in_hdx()
                    showcase_not_added = False
                dataset = datasets[nextdict['name']]
                dataset.update_from_yaml()
                dataset.generate_resource_view(
                    join('config', nextdict['resourceview']))
                dataset.create_in_hdx(remove_additional_resources=True,
                                      hxl_update=False,
                                      updated_by_script='HDX Scraper: IDMC',
                                      batch=batch)
                showcase.add_dataset(dataset)
            else:
                countryiso = nextdict['iso3']
                countrydata = countriesdata[countryiso]
                dataset, showcase, bites_disabled = \
                    generate_country_dataset_and_showcase(downloader, folder, headersdata, countryiso, countrydata, datasets, tags)
                if dataset:
                    dataset.update_from_yaml()
                    dataset.generate_resource_view(
                        bites_disabled=bites_disabled)
                    dataset.create_in_hdx(
                        remove_additional_resources=True,
                        hxl_update=False,
                        updated_by_script='HDX Scraper: IDMC',
                        batch=batch)
                    resources = dataset.get_resources()
                    resource_ids = [
                        x['id'] for x in sorted(resources,
                                                key=lambda x: len(x['name']),
                                                reverse=True)
                    ]
                    dataset.reorder_resources(resource_ids, hxl_update=False)

예제 #30

0

파일 보기

파일: test_downloader.py 프로젝트: OCHA-DAP/hdx-python-utilities

 def test_download_tabular_key_value(self, fixtureurl, fixtureprocessurl):
     with Download() as downloader:
         result = downloader.download_tabular_key_value(fixtureurl,
                                                        file_type="csv")
         assert result == {"615": "2231RTA", "GWNO": "EVENT_ID_CNTY"}
         result = downloader.download_tabular_key_value(fixtureprocessurl,
                                                        headers=2)
         assert result == {"coal": "3", "gas": "2"}
         with pytest.raises(DownloadError):
             downloader.download_tabular_key_value(
                 "NOTEXIST://NOTEXIST.csv")