Exemple #1
0
def check_conesearch_sites(destdir=os.curdir,
                           verbose=True,
                           parallel=True,
                           url_list='default'):
    """
    Validate Cone Search Services.

    .. note::

        URLs are unescaped prior to validation.

        Only check queries with ``<testQuery>`` parameters.
        Does not perform meta-data and erroneous queries.

    Parameters
    ----------
    destdir : str, optional
        Directory to store output files. Will be created if does
        not exist. Existing files with these names will be deleted
        or replaced:

            * conesearch_good.json
            * conesearch_warn.json
            * conesearch_exception.json
            * conesearch_error.json

    verbose : bool, optional
        Print extra info to log.

    parallel : bool, optional
        Enable multiprocessing.

    url_list : list of string, optional
        Only check these access URLs against
        ``astroquery.vo_conesearch.validator.conf.conesearch_master_list``
        and ignore the others, which will not appear in output files.
        By default, check those in
        ``astroquery.vo_conesearch.validator.conf.conesearch_urls``.
        If `None`, check everything.

    Raises
    ------
    IOError
        Invalid destination directory.

    timeout
        URL request timed out.

    ValidationMultiprocessingError
        Multiprocessing failed.

    """
    if url_list == 'default':
        url_list = conf.conesearch_urls

    if (not isinstance(destdir, str) or len(destdir) == 0
            or os.path.exists(destdir) and not os.path.isdir(destdir)):
        raise IOError('Invalid destination directory')  # pragma: no cover

    if not os.path.exists(destdir):
        os.mkdir(destdir)

    # Output dir created by votable.validator
    out_dir = os.path.join(destdir, 'results')

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    # Output files
    db_file = OrderedDict()
    db_file['good'] = os.path.join(destdir, 'conesearch_good.json')
    db_file['warn'] = os.path.join(destdir, 'conesearch_warn.json')
    db_file['excp'] = os.path.join(destdir, 'conesearch_exception.json')
    db_file['nerr'] = os.path.join(destdir, 'conesearch_error.json')

    # JSON dictionaries for output files
    js_tree = {}
    for key in db_file:
        js_tree[key] = VOSDatabase.create_empty()

        # Delete existing files, if any, to be on the safe side.
        # Else can cause confusion if program exited prior to
        # new files being written but old files are still there.
        if os.path.exists(db_file[key]):  # pragma: no cover
            os.remove(db_file[key])
            if verbose:
                log.info('Existing file {0} deleted'.format(db_file[key]))

    # Master VO database from registry. Silence all the warnings.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        js_mstr = VOSDatabase.from_registry(conf.conesearch_master_list,
                                            encoding='binary',
                                            show_progress=verbose)

    # Validate only a subset of the services.
    if url_list is not None:
        # Make sure URL is unique and fixed.
        if ASTROPY_LT_4_1:
            url_list = set(
                map(unescape_all, [
                    cur_url.encode('utf-8')
                    if isinstance(cur_url, str) else cur_url
                    for cur_url in url_list
                ]))
        else:
            url_list = set(
                map(unescape_all, [
                    cur_url if isinstance(cur_url, str) else cur_url
                    for cur_url in url_list
                ]))
        uniq_rows = len(url_list)
        url_list_processed = []  # To track if given URL is valid in registry
        if verbose:
            log.info('Only {0}/{1} site(s) are validated'.format(
                uniq_rows, len(js_mstr)))
    # Validate all services.
    else:
        uniq_rows = len(js_mstr)

    key_lookup_by_url = {}

    # Process each catalog in the registry.
    if ASTROPY_LT_4_1:
        for cur_key, cur_cat in js_mstr.get_catalogs():
            cur_url = cur_cat['url'].encode('utf-8')

            # Skip if:
            #   a. not a Cone Search service
            #   b. not in given subset, if any
            if ((cur_cat['cap_type'] != b'conesearch')
                    or (url_list is not None and cur_url not in url_list)):
                continue

            # Use testQuery to return non-empty VO table with max verbosity.
            testquery_pars = parse_cs(cur_cat['ivoid'], cur_cat['cap_index'])
            cs_pars_arr = [
                '{}={}'.format(key, testquery_pars[key]).encode('utf-8')
                for key in testquery_pars
            ]
            cs_pars_arr += [b'VERB=3']

            # Track the service.
            key_lookup_by_url[cur_url + b'&'.join(cs_pars_arr)] = cur_key
            if url_list is not None:
                url_list_processed.append(cur_url)
    else:
        for cur_key, cur_cat in js_mstr.get_catalogs():
            cur_url = cur_cat['url']

            # Skip if:
            #   a. not a Cone Search service
            #   b. not in given subset, if any
            if ((cur_cat['cap_type'] != 'conesearch')
                    or (url_list is not None and cur_url not in url_list)):
                continue

            # Use testQuery to return non-empty VO table with max verbosity.
            testquery_pars = parse_cs(cur_cat['ivoid'], cur_cat['cap_index'])
            cs_pars_arr = [
                '{}={}'.format(key, testquery_pars[key])
                for key in testquery_pars
            ]
            cs_pars_arr += ['VERB=3']

            # Track the service.
            key_lookup_by_url[cur_url + '&'.join(cs_pars_arr)] = cur_key
            if url_list is not None:
                url_list_processed.append(cur_url)

    # Give warning if any of the user given subset is not in the registry.
    if url_list is not None:
        url_list_skipped = url_list - set(url_list_processed)
        n_skipped = len(url_list_skipped)
        if n_skipped > 0:
            warn_str = '{0} not found in registry! Skipped:\n'.format(
                n_skipped)
            for cur_url in url_list_skipped:
                warn_str += '\t{0}\n'.format(cur_url)
            warnings.warn(warn_str, AstropyUserWarning)

    all_urls = list(key_lookup_by_url)
    timeout = data.conf.remote_timeout
    if ASTROPY_LT_4_1:
        map_args = [(out_dir, url, timeout) for url in all_urls]
    else:
        map_args = [(out_dir, url.encode('utf-8'), timeout)
                    for url in all_urls]

    # Validate URLs
    if parallel:
        pool = multiprocessing.Pool()
        try:
            mp_list = pool.map(_do_validation, map_args)
        except Exception as exc:  # pragma: no cover
            raise ValidationMultiprocessingError(
                'An exception occurred during parallel processing '
                'of validation results: {0}'.format(exc))
    else:
        mp_list = map(_do_validation, map_args)

    # Categorize validation results
    if ASTROPY_LT_4_1:
        for r in mp_list:
            db_key = r['out_db_name']
            cat_key = key_lookup_by_url[r.url]
            cur_cat = js_mstr.get_catalog(cat_key)
            _copy_r_to_cat(r, cur_cat)
            js_tree[db_key].add_catalog(cat_key, cur_cat)
    else:
        for r in mp_list:
            db_key = r['out_db_name']
            cat_key = key_lookup_by_url[r.url.decode('utf-8')]
            cur_cat = js_mstr.get_catalog(cat_key)
            _copy_r_to_cat(r, cur_cat)
            js_tree[db_key].add_catalog(cat_key, cur_cat)

    # Write to HTML
    html_subsets = result.get_result_subsets(mp_list, out_dir)
    html.write_index(html_subsets, all_urls, out_dir)
    if parallel:
        html_subindex_args = [(out_dir, html_subset, uniq_rows)
                              for html_subset in html_subsets]
        pool.map(_html_subindex, html_subindex_args)
    else:
        for html_subset in html_subsets:
            _html_subindex((out_dir, html_subset, uniq_rows))

    # Write to JSON
    n = {}
    n_tot = 0
    for key in db_file:
        n[key] = len(js_tree[key])
        n_tot += n[key]
        js_tree[key].to_json(db_file[key], overwrite=True)
        if verbose:
            log.info('{0}: {1} catalog(s)'.format(key, n[key]))

    # Checksum
    if verbose:
        log.info('total: {0} out of {1} catalog(s)'.format(n_tot, uniq_rows))

    if n['good'] == 0:  # pragma: no cover
        warnings.warn('No good sites available for Cone Search.',
                      AstropyUserWarning)
Exemple #2
0
def check_conesearch_sites(destdir=os.curdir, verbose=True, parallel=True,
                           url_list='default'):
    """
    Validate Cone Search Services.

    .. note::

        URLs are unescaped prior to validation.

        Only check queries with ``<testQuery>`` parameters.
        Does not perform meta-data and erroneous queries.

    Parameters
    ----------
    destdir : str, optional
        Directory to store output files. Will be created if does
        not exist. Existing files with these names will be deleted
        or replaced:

            * conesearch_good.json
            * conesearch_warn.json
            * conesearch_exception.json
            * conesearch_error.json

    verbose : bool, optional
        Print extra info to log.

    parallel : bool, optional
        Enable multiprocessing.

    url_list : list of string, optional
        Only check these access URLs against
        ``astroquery.vo_conesearch.validator.conf.conesearch_master_list``
        and ignore the others, which will not appear in output files.
        By default, check those in
        ``astroquery.vo_conesearch.validator.conf.conesearch_urls``.
        If `None`, check everything.

    Raises
    ------
    IOError
        Invalid destination directory.

    timeout
        URL request timed out.

    ValidationMultiprocessingError
        Multiprocessing failed.

    """
    if url_list == 'default':
        url_list = conf.conesearch_urls

    if (not isinstance(destdir, six.string_types) or len(destdir) == 0 or
            os.path.exists(destdir) and not os.path.isdir(destdir)):
        raise IOError('Invalid destination directory')  # pragma: no cover

    if not os.path.exists(destdir):
        os.mkdir(destdir)

    # Output dir created by votable.validator
    out_dir = os.path.join(destdir, 'results')

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    # Output files
    db_file = OrderedDict()
    db_file['good'] = os.path.join(destdir, 'conesearch_good.json')
    db_file['warn'] = os.path.join(destdir, 'conesearch_warn.json')
    db_file['excp'] = os.path.join(destdir, 'conesearch_exception.json')
    db_file['nerr'] = os.path.join(destdir, 'conesearch_error.json')

    # JSON dictionaries for output files
    js_tree = {}
    for key in db_file:
        js_tree[key] = VOSDatabase.create_empty()

        # Delete existing files, if any, to be on the safe side.
        # Else can cause confusion if program exited prior to
        # new files being written but old files are still there.
        if os.path.exists(db_file[key]):  # pragma: no cover
            os.remove(db_file[key])
            if verbose:
                log.info('Existing file {0} deleted'.format(db_file[key]))

    # Master VO database from registry. Silence all the warnings.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        js_mstr = VOSDatabase.from_registry(
            conf.conesearch_master_list, encoding='binary',
            show_progress=verbose)

    # Validate only a subset of the services.
    if url_list is not None:
        # Make sure URL is unique and fixed.
        url_list = set(map(
            unescape_all,
            [cur_url.encode('utf-8') if isinstance(cur_url, str) else cur_url
             for cur_url in url_list]))
        uniq_rows = len(url_list)
        url_list_processed = []  # To track if given URL is valid in registry
        if verbose:
            log.info('Only {0}/{1} site(s) are validated'.format(uniq_rows,
                                                                 len(js_mstr)))
    # Validate all services.
    else:
        uniq_rows = len(js_mstr)

    key_lookup_by_url = {}

    # Process each catalog in the registry.
    for cur_key, cur_cat in js_mstr.get_catalogs():
        cur_url = cur_cat['url'].encode('utf-8')

        # Skip if:
        #   a. not a Cone Search service
        #   b. not in given subset, if any
        if ((cur_cat['cap_type'] != b'conesearch') or
                (url_list is not None and cur_url not in url_list)):
            continue

        # Use testQuery to return non-empty VO table with max verbosity.
        testquery_pars = parse_cs(cur_cat['ivoid'], cur_cat['cap_index'])
        cs_pars_arr = ['{}={}'.format(key, testquery_pars[key]).encode('utf-8')
                       for key in testquery_pars]
        cs_pars_arr += [b'VERB=3']

        # Track the service.
        key_lookup_by_url[cur_url + b'&'.join(cs_pars_arr)] = cur_key
        if url_list is not None:
            url_list_processed.append(cur_url)

    # Give warning if any of the user given subset is not in the registry.
    if url_list is not None:
        url_list_skipped = url_list - set(url_list_processed)
        n_skipped = len(url_list_skipped)
        if n_skipped > 0:
            warn_str = '{0} not found in registry! Skipped:\n'.format(
                n_skipped)
            for cur_url in url_list_skipped:
                warn_str += '\t{0}\n'.format(cur_url)
            warnings.warn(warn_str, AstropyUserWarning)

    all_urls = list(key_lookup_by_url)
    timeout = data.conf.remote_timeout
    map_args = [(out_dir, url, timeout) for url in all_urls]

    # Validate URLs
    if parallel:
        pool = multiprocessing.Pool()
        try:
            mp_list = pool.map(_do_validation, map_args)
        except Exception as exc:  # pragma: no cover
            raise ValidationMultiprocessingError(
                'An exception occurred during parallel processing '
                'of validation results: {0}'.format(exc))
    else:
        mp_list = map(_do_validation, map_args)

    # Categorize validation results
    for r in mp_list:
        db_key = r['out_db_name']
        cat_key = key_lookup_by_url[r.url]
        cur_cat = js_mstr.get_catalog(cat_key)
        _copy_r_to_cat(r, cur_cat)
        js_tree[db_key].add_catalog(cat_key, cur_cat)

    # Write to HTML
    html_subsets = result.get_result_subsets(mp_list, out_dir)
    html.write_index(html_subsets, all_urls, out_dir)
    if parallel:
        html_subindex_args = [(out_dir, html_subset, uniq_rows)
                              for html_subset in html_subsets]
        pool.map(_html_subindex, html_subindex_args)
    else:
        for html_subset in html_subsets:
            _html_subindex((out_dir, html_subset, uniq_rows))

    # Write to JSON
    n = {}
    n_tot = 0
    for key in db_file:
        n[key] = len(js_tree[key])
        n_tot += n[key]
        js_tree[key].to_json(db_file[key], overwrite=True)
        if verbose:
            log.info('{0}: {1} catalog(s)'.format(key, n[key]))

    # Checksum
    if verbose:
        log.info('total: {0} out of {1} catalog(s)'.format(n_tot, uniq_rows))

    if n['good'] == 0:  # pragma: no cover
        warnings.warn(
            'No good sites available for Cone Search.', AstropyUserWarning)