예제 #1
0
 def test_exception(self, hdx_config_yaml, project_config_yaml):
     UserAgent.clear_global()
     testresult.actual_result = None
     with pytest.raises(ValueError):
         facade(my_excfn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     UserAgent.clear_global()
     with pytest.raises(UserAgentError):
         facade(my_testuafn, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     UserAgent.clear_global()
예제 #2
0
 def test_exception(self, hdx_config_yaml, project_config_yaml):
     UserAgent.clear_global()
     testresult.actual_result = None
     with pytest.raises(ValueError):
         facade(my_excfn,
                user_agent='test',
                hdx_config_yaml=hdx_config_yaml,
                project_config_yaml=project_config_yaml)
     UserAgent.clear_global()
     with pytest.raises(UserAgentError):
         facade(my_testuafn,
                hdx_config_yaml=hdx_config_yaml,
                project_config_yaml=project_config_yaml)
     UserAgent.clear_global()
    )
    parser.add_argument("--old-url-pattern",
                        default="scraperwiki.com",
                        help="String present in old URLs")
    parser.add_argument("--new-url-pattern",
                        default="scraperwiki-snapshot/datasets",
                        help="String present in new URLs")
    parser.add_argument("-l", "--log", help="Log file", default=None)
    parser.add_argument("-d",
                        "--debug",
                        action='store_true',
                        help="Log debug messages.")
    parser.add_argument("-v",
                        "--verbose",
                        action='store_true',
                        help="Increase verbosity.")
    config = parser.parse_args()

    log_level = logging.WARNING
    if config.verbose:
        log_level = min(log_level, logging.INFO)
    if config.debug:
        log_level = min(log_level, logging.DEBUG)

    if config.log is not None:
        logging.basicConfig(filename=config.log, level=log_level)
    facade(main,
           hdx_site=config.hdx_site,
           user_agent_config_yaml=join(expanduser('~'),
                                       '.dscheckuseragent.yml'))
예제 #4
0
# -*- coding: utf-8 -*-
'''
Calls a function that generates a dataset and creates it in HDX.

'''
import logging

from hdx.facades.simple import facade
from .my_code import generate_dataset

logger = logging.getLogger(__name__)


def main():
    '''Generate dataset and create it in HDX'''

    dataset = generate_dataset()
    dataset.create_in_hdx()


if __name__ == '__main__':
    facade(main, hdx_site='test')
        logger.info("Number of countries: %d" % len(countriesdata))
        for info, country in progress_storing_tempdir("UNHCR_population",
                                                      countries, "iso3"):
            folder = info["folder"]

            dataset, showcase = generate_dataset_and_showcase(
                folder, country, countriesdata[country["iso3"]], headers,
                resources, fields)
            if dataset:
                dataset.update_from_yaml()
                dataset["notes"] = dataset["notes"].replace(
                    "\n", "  \n")  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNHCR population",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)


if __name__ == "__main__":
    facade(
        main,
        user_agent_config_yaml=join(expanduser("~"), ".useragents.yml"),
        user_agent_lookup=lookup,
        project_config_yaml=join("config", "project_configuration.yml"),
    )
예제 #6
0
 def test_facade(self, monkeypatch, hdx_config_yaml, project_config_yaml):
     UserAgent.clear_global()
     my_user_agent = 'test'
     testresult.actual_result = None
     facade(my_testfn, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://data.humdata.org/'
     UserAgent.clear_global()
     version = get_api_version()
     testresult.actual_result = None
     facade(my_testuafn, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     my_user_agent = 'lala'
     monkeypatch.setenv('USER_AGENT', my_user_agent)
     facade(my_testuafn, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     facade(my_testuafn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     my_preprefix = 'haha'
     monkeypatch.setenv('PREPREFIX', my_preprefix)
     facade(my_testuafn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == '%s:HDXPythonLibrary/%s-%s' % (my_preprefix, version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     my_test_key = '1234'
     facade(my_testkeyfn, hdx_key=my_test_key, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_key
     UserAgent.clear_global()
     testresult.actual_result = None
     monkeypatch.setenv('HDX_KEY', my_test_key)
     facade(my_testkeyfn, hdx_key='aaaa', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_key
     UserAgent.clear_global()
     testresult.actual_result = None
     my_test_hdxsite = 'test'
     facade(my_testfn, hdx_site=my_test_hdxsite, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://%s-data.humdata.org/' % my_test_hdxsite
     UserAgent.clear_global()
     testresult.actual_result = None
     monkeypatch.setenv('HDX_SITE', my_test_hdxsite)
     facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://%s-data.humdata.org/' % my_test_hdxsite
     UserAgent.clear_global()
     my_test_hdxurl = 'http://other-data.humdata.org'
     monkeypatch.setenv('HDX_URL', my_test_hdxurl)
     facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_hdxurl
     UserAgent.clear_global()
예제 #7
0
    results[len(results) - 1].extend(rows)

    dataset, resources, showcase = generate_dataset_resources_and_showcase(
        pop_types, today)
    folder = gettempdir()
    file_to_upload = None
    for i, _ in enumerate(results):
        resource = resources[i]
        file_to_upload = join(folder, resource['name'])
        write_list_to_csv(results[i], file_to_upload, headers=headers[i])
        resource.set_file_to_upload(file_to_upload)
    dataset.add_update_resources(resources)
    dataset.update_from_yaml()
    #    dataset.create_in_hdx()
    for resource in dataset.get_resources():
        name = resource['name'].lower()
        if 'figures' in name and 'disagg' not in name:
            logger.info('Updating key figures datastore for %s' % name)


#            resource.update_datastore_for_topline(path=file_to_upload)
#    showcase.create_in_hdx()
#    showcase.add_dataset(dataset)

if __name__ == '__main__':
    facade(main,
           hdx_site='demo',
           user_agent_config_yaml=join(expanduser('~'), '.useragents.yml'),
           user_agent_lookup='hdx-scraper-chathamhouse',
           project_config_yaml=join('config', 'project_configuration.yml'))
예제 #8
0
        configuration = Configuration.read()
        countries_path = join('config', configuration['countries_filename'])
        indicators_url = configuration['indicators_url']
        mvam_url = configuration['mvam_url']
        showcase_url = configuration['showcase_url']
        countries = get_countries(countries_path, downloader)
        variables = get_mvamvariables(indicators_url, downloader)
        logger.info('Number of datasets to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('WFPFoodSecurity',
                                                      countries, 'iso3'):
            dataset, showcase, bites_disabled = \
                generate_dataset_and_showcase(mvam_url, showcase_url, downloader, info['folder'],
                                              country, variables)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(bites_disabled=bites_disabled)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script='HDX Scraper: WFP Food Security',
                    batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)


if __name__ == '__main__':
    facade(main,
           user_agent_config_yaml=join(expanduser('~'), '.useragents.yml'),
           user_agent_lookup=lookup,
           project_config_yaml=join('config', 'project_configuration.yml'))
예제 #9
0
 def test_exception(self, hdx_key_file, project_config_yaml):
     testresult.actual_result = None
     with pytest.raises(ValueError):
         facade(my_excfn, hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml)
예제 #10
0
    sheet.update_values('A1', rows)
    logger.info('Longest activities: %d' % largest_activities)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Data Freshness Emailer')
    parser.add_argument('-hk', '--hdx_key', default=None, help='HDX api key')
    parser.add_argument('-ua', '--user_agent', default=None, help='user agent')
    parser.add_argument('-pp', '--preprefix', default=None, help='preprefix')
    parser.add_argument('-hs', '--hdx_site', default=None, help='HDX site to use')
    parser.add_argument('-gs', '--gsheet_auth', default=None, help='Credentials for accessing Google Sheets')
    args = parser.parse_args()
    hdx_key = args.hdx_key
    if hdx_key is None:
        hdx_key = getenv('HDX_KEY')
    user_agent = args.user_agent
    if user_agent is None:
        user_agent = getenv('USER_AGENT')
        if user_agent is None:
            user_agent = 'crisis-casestudy'
    preprefix = args.preprefix
    if preprefix is None:
        preprefix = getenv('PREPREFIX')
    hdx_site = args.hdx_site
    if hdx_site is None:
        hdx_site = getenv('HDX_SITE', 'prod')
    gsheet_auth = args.gsheet_auth
    if gsheet_auth is None:
        gsheet_auth = getenv('GSHEET_AUTH')
    facade(main, hdx_key=hdx_key, user_agent=user_agent, preprefix=preprefix, hdx_site=hdx_site, gsheet_auth=gsheet_auth, project_config_yaml=join('config', 'project_configuration.yml'))
예제 #11
0
 def test_facade(self, hdx_key_file, project_config_yaml):
     testresult.actual_result = None
     facade(my_testfn, hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://test-data.humdata.org/'
예제 #12
0
        # 'United arab emirates': 'AE',
        # 'United kingdom': 'GB', pas bon
        # 'United states': 'US', pas bon
        'Uruguay': 'UY',
        'Uzbekistan': 'UZ',
        'Vanuatu': 'VU',
        'Venezuela': 'VE',
        'Viet nam': 'VN',
        'Yemen': 'YE',
        'Zambia': 'ZM',
        'Zimbabwe': 'ZW'
    }
    for pays in countries:
        # dataset, showcase = generate_dataset_and_showcase(pays, countries[pays])
        # dataset.update_from_yaml()

        # dataset.create_in_hdx(hxl_update=False)
        # # dataset.create_in_hdx()
        # dataset.add_tag('INDICATORS')
        # showcase.create_in_hdx()

        # showcase.add_dataset(dataset)
        updateTag(countries[pays])


if __name__ == '__main__':
    facade(main,
           hdx_site='prod',
           user_agent='HDXINTERNAL unhabitat scraper',
           project_config_yaml=join('config', 'project_configuration.yml'))
예제 #13
0
            return resource_id, dataset_id, url, None, resource_issues['info'], resource_issues['warning'], \
                   resource_issues['error'], resource_issues['total'], resource_issues['toobig'], \
                   resource_issues['other']
        except Exception as exc:
            try:
                code = exc.code
            except AttributeError:
                code = ''
            err = 'Exception during hashing: code=%s message=%s raised=%s.%s url=%s' % (
                code, exc, exc.__class__.__module__,
                exc.__class__.__qualname__, url)
            raise type(exc)(err).with_traceback(sys.exc_info()[2])

    try:
        return await retry.send_http(
            session,
            'get',
            url,
            retries=1,
            interval=1,
            backoff=4,
            http_status_codes_to_retry=[429, 500, 502, 503, 504],
            fn=fn)
    except Exception as e:
        return resource_id, dataset_id, url, str(
            e), False, False, False, False, False, True


if __name__ == '__main__':
    facade(main, user_agent='test', preprefix='HDXINTERNAL', hdx_site='prod')
예제 #14
0
 def test_facade(self, hdx_key_file, project_config_yaml):
     testresult.actual_result = None
     facade(my_testfn,
            hdx_key_file=hdx_key_file,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://test-data.humdata.org/'
예제 #15
0
    results = loop.run_until_complete(future)
    logger.info('Execution time: %s seconds' % (time.time() - start_time))
    print_results(results)


def run_grequests(last_modified_check):
    start_time = time.time()
    results = grequests_check_resources_for_last_modified(last_modified_check)
    logger.info('Execution time: %s seconds' % (time.time() - start_time))
    print_results(results)


def main(configuration):
    resources = Resource.search_in_hdx(configuration, 'name:')
    last_modified_check = list()
    for resource in resources:
        resource_id = resource['id']
        url = resource['url']
        if 'data.humdata.org' in url or 'manage.hdx.rwlabs.org' in url or 'proxy.hxlstandard.org' in url or \
                'scraperwiki.com' in url or 'ourairports.com' in url:
            continue
        last_modified_check.append((url, resource_id))
    last_modified_check = sorted(
        last_modified_check)[:NUMBER_OF_URLS_TO_PROCESS]
    #    run_grequests(last_modified_check)
    run_aiohttp(last_modified_check)


if __name__ == '__main__':
    facade(main, hdx_site='prod', hdx_read_only=True)
예제 #16
0
 def test_exception(self, hdx_key_file, project_config_yaml):
     testresult.actual_result = None
     with pytest.raises(ValueError):
         facade(my_excfn,
                hdx_key_file=hdx_key_file,
                project_config_yaml=project_config_yaml)
예제 #17
0
import logging

from hdx.hdx_configuration import Configuration
from os.path import join

from hdx.facades.simple import facade

logger = logging.getLogger(__name__)


def main():
    print('nothing do to yet')


if __name__ == '__main__':
    facade(main,
           hdx_site='demo',
           user_agent='HDXINTERNAL OCHA-Philippines scraper')
예제 #18
0
 def test_facade(self, monkeypatch, hdx_config_yaml, project_config_yaml):
     UserAgent.clear_global()
     my_user_agent = 'test'
     testresult.actual_result = None
     facade(my_testfn,
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://data.humdata.org'
     UserAgent.clear_global()
     version = get_api_version()
     testresult.actual_result = None
     facade(my_testuafn,
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (
         version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     my_user_agent = 'lala'
     monkeypatch.setenv('USER_AGENT', my_user_agent)
     facade(my_testuafn,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (
         version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     facade(my_testuafn,
            user_agent='test',
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (
         version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     my_preprefix = 'haha'
     monkeypatch.setenv('PREPREFIX', my_preprefix)
     facade(my_testuafn,
            user_agent='test',
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == '%s:HDXPythonLibrary/%s-%s' % (
         my_preprefix, version, my_user_agent)
     UserAgent.clear_global()
     testresult.actual_result = None
     my_test_key = '1234'
     facade(my_testkeyfn,
            hdx_key=my_test_key,
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_key
     UserAgent.clear_global()
     testresult.actual_result = None
     monkeypatch.setenv('HDX_KEY', my_test_key)
     facade(my_testkeyfn,
            hdx_key='aaaa',
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_key
     UserAgent.clear_global()
     testresult.actual_result = None
     my_test_hdxsite = 'test'
     facade(my_testfn,
            hdx_site=my_test_hdxsite,
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://%s-data.humdata.org' % my_test_hdxsite
     UserAgent.clear_global()
     testresult.actual_result = None
     monkeypatch.setenv('HDX_SITE', my_test_hdxsite)
     facade(my_testfn,
            hdx_site='feature',
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == 'https://%s-data.humdata.org' % my_test_hdxsite
     UserAgent.clear_global()
     my_test_hdxurl = 'http://other-data.humdata.org'
     monkeypatch.setenv('HDX_URL', my_test_hdxurl)
     facade(my_testfn,
            hdx_site='feature',
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_hdxurl
     UserAgent.clear_global()
     my_test_hdxurl2 = 'http://other-data.humdata.org/'
     monkeypatch.setenv('HDX_URL', my_test_hdxurl2)
     facade(my_testfn,
            hdx_site='feature',
            user_agent=my_user_agent,
            hdx_config_yaml=hdx_config_yaml,
            project_config_yaml=project_config_yaml)
     assert testresult.actual_result == my_test_hdxurl
     UserAgent.clear_global()
예제 #19
0
from eca import generateDatasets

import logging

from hdx.hdx_configuration import Configuration
from os.path import join

from hdx.facades.simple import facade

logger = logging.getLogger(__name__)


def main():
    datasets = generateDatasets(32)
    for dataset in datasets:
        dataset.update_from_yaml()
        # dataset.check_required_fields(ignore_fields=['notes'])

        dataset.create_in_hdx()


if __name__ == '__main__':
    facade(main, hdx_site='test', user_agent='HDXINTERNAL UNECA scraper')