Beispiel #1
0
def _parse_waf(context, repos, record, identifier):

    recobjs = []

    content = util.http_request('GET', record)

    LOGGER.debug(content)

    try:
        parser = etree.HTMLParser()
        tree = etree.fromstring(content, parser=parser)
    except Exception as err:
        raise Exception('Could not parse WAF: %s' % str(err))
        
    up = urlparse(record)
    links = []

    LOGGER.debug('collecting links')
    for link in tree.xpath('//a/@href'):
        link = link.strip()
        if not link:
            continue
        if link.find('?') != -1:
            continue
        if not link.endswith('.xml'):
            LOGGER.debug('Skipping, not .xml')
            continue
        if '/' in link:  # path is embedded in link
            if link[-1] == '/':  # directory, skip
                continue
            if link[0] == '/':
                # strip path of WAF URL
                link = '%s://%s%s' % (up.scheme, up.netloc, link)
        else:  # tack on href to WAF URL
            link = '%s/%s' % (record, link)
        LOGGER.debug('URL is: %s', link)
        links.append(link)

    LOGGER.debug('%d links found', len(links))
    for link in links:
        LOGGER.debug('Processing link %s', link)
        # fetch and parse
        linkcontent = util.http_request('GET', link)
        recobj = _parse_metadata(context, repos, linkcontent)[0]
        recobj.source = link
        recobj.mdsource = link
        recobjs.append(recobj)

    return recobjs
Beispiel #2
0
def parse_record(context, record, repos=None,
    mtype='http://www.opengis.net/cat/csw/2.0.2',
    identifier=None, pagesize=10):
    ''' parse metadata '''

    if identifier is None:
        identifier = uuid.uuid4().get_urn()

    # parse web services
    if (mtype == 'http://www.opengis.net/cat/csw/2.0.2' and
        isinstance(record, str) and record.startswith('http')):
        LOGGER.debug('CSW service detected, fetching via HTTP')
        # CSW service, not csw:Record
        try:
            return _parse_csw(context, repos, record, identifier, pagesize)
        except Exception, err:
            # TODO: implement better exception handling
            if err.message.find('ExceptionReport') != -1:
                msg = 'CSW harvesting error: %s' % str(err)
                LOGGER.debug(msg)
                raise RuntimeError(msg)
            LOGGER.debug('Not a CSW, attempting to fetch Dublin Core')
            try:
                content = util.http_request('GET', record)
            except Exception, err:
                raise RuntimeError('HTTP error: %s' % str(err))
            return [_parse_dc(context, repos, etree.fromstring(content))]
Beispiel #3
0
def parse_record(context,
                 record,
                 repos=None,
                 mtype='http://www.opengis.net/cat/csw/2.0.2',
                 identifier=None,
                 pagesize=10):
    ''' parse metadata '''

    if identifier is None:
        identifier = uuid.uuid4().get_urn()

    # parse web services
    if (mtype == 'http://www.opengis.net/cat/csw/2.0.2'
            and isinstance(record, str) and record.startswith('http')):
        LOGGER.debug('CSW service detected, fetching via HTTP')
        # CSW service, not csw:Record
        try:
            return _parse_csw(context, repos, record, identifier, pagesize)
        except Exception, err:
            # TODO: implement better exception handling
            if str(err).find('ExceptionReport') != -1:
                msg = 'CSW harvesting error: %s' % str(err)
                LOGGER.debug(msg)
                raise RuntimeError(msg)
            LOGGER.debug('Not a CSW, attempting to fetch Dublin Core')
            try:
                content = util.http_request('GET', record)
            except Exception, err:
                raise RuntimeError('HTTP error: %s' % str(err))
            return [_parse_dc(context, repos, etree.fromstring(content))]
Beispiel #4
0
def _parse_waf(context, repos, record, identifier):

    recobjs = []

    content = util.http_request('GET', record)

    LOGGER.debug(content)

    try:
        parser = etree.HTMLParser()
        tree = etree.fromstring(content, parser=parser)
    except Exception, err:
        raise Exception('Could not parse WAF: %s' % str(err))
Beispiel #5
0
def _parse_waf(context, repos, record, identifier):

    recobjs = []

    content = util.http_request('GET', record)

    LOGGER.debug(content)

    try:
        parser = etree.HTMLParser()
        tree = etree.fromstring(content, parser=parser)
    except Exception, err:
        raise Exception('Could not parse WAF: %s' % str(err))
Beispiel #6
0
                    if os.path.splitext(sfile)[1] not in ['.xml', '.txt']:
                        break

                    if sfile == 'requests.txt':  # GET requests
                        filename = '%s%s%s' % (root, os.sep, sfile)
                        gets = csv.reader(open(filename))
                        for row in gets:
                            testfile = '%s%s%s' % (root, os.sep, sfile)
                            request = ','.join(row[1:]).replace('PYCSW_SERVER',
                                                                URL)
                            outfile = '%s%s' % (root.replace(os.sep, '_'),
                                                '_%s.xml' % row[0])
                            expected = 'expected%s%s' % (os.sep, outfile)
                            print '\n test %s:%s' % (testfile, row[0])

                            result = http_request('GET', request)

                            status = get_validity(expected, result, outfile)

                            if status == 1:
                                print '  passed'
                                PASSED += 1
                            elif status == 0:
                                print '  initialized'
                                INITED += 1
                            else:
                                print '  FAILED'
                                FAILED += 1

                            if LOGWRITER is not None:
                                LOGWRITER.writerow([URL, cfg,
Beispiel #7
0
    elif mtype == 'http://www.opengis.net/wms':  # WMS
        return _parse_wms(context, repos, record, identifier)
     
    elif mtype == 'http://www.opengis.net/wps/1.0.0':  # WPS
        return [_parse_wps(context, repos, record, identifier)]

    elif mtype == 'http://www.opengis.net/wfs':  # WFS
        return _parse_wfs(context, repos, record, identifier)

    elif mtype == 'http://www.opengis.net/wcs':  # WCS
        return _parse_wcs(context, repos, record, identifier)

    elif (mtype == 'http://www.opengis.net/cat/csw/csdgm' and
          record.startswith('http')):  # FGDC
        record = util.http_request('GET', record)

    # parse metadata records
    if isinstance(record, str):
        exml = etree.fromstring(record)
    else:  # already serialized to lxml
        if hasattr(record, 'getroot'):  # standalone document
            exml = record.getroot()
        else:  # part of a larger document
            exml = record

    root = exml.tag

    LOGGER.debug('Serialized metadata, parsing content model')

    if root == '{%s}MD_Metadata' % context.namespaces['gmd']:  # ISO
Beispiel #8
0
    elif mtype == 'http://www.opengis.net/wcs':  # WCS
        LOGGER.debug('WCS detected, fetching via OWSLib')
        return _parse_wcs(context, repos, record, identifier)

    elif mtype == 'http://www.opengis.net/sos/1.0':  # SOS 1.0.0
        LOGGER.debug('SOS 1.0.0 detected, fetching via OWSLib')
        return _parse_sos(context, repos, record, identifier, '1.0.0')

    elif mtype == 'http://www.opengis.net/sos/2.0':  # SOS 2.0.0
        LOGGER.debug('SOS 2.0.0 detected, fetching via OWSLib')
        return _parse_sos(context, repos, record, identifier, '2.0.0')

    elif (mtype == 'http://www.opengis.net/cat/csw/csdgm' and
          record.startswith('http')):  # FGDC
        LOGGER.debug('FGDC detected, fetching via HTTP')
        record = util.http_request('GET', record)

    return _parse_metadata(context, repos, record)

def _set(context, obj, name, value):
    ''' convenience method to set values '''
    setattr(obj, context.md_core_model['mappings'][name], value)

def _parse_metadata(context, repos, record):
    """parse metadata formats"""

    if isinstance(record, str):
        exml = etree.fromstring(record)
    else:  # already serialized to lxml
        if hasattr(record, 'getroot'):  # standalone document
            exml = record.getroot()
Beispiel #9
0
                    if os.path.splitext(sfile)[1] not in ['.xml', '.txt']:
                        break

                    if sfile == 'requests.txt':  # GET requests
                        filename = '%s%s%s' % (root, os.sep, sfile)
                        gets = csv.reader(open(filename))
                        for row in gets:
                            testfile = '%s%s%s' % (root, os.sep, sfile)
                            request = ','.join(row[1:]).replace(
                                'PYCSW_SERVER', URL)
                            outfile = '%s%s' % (root.replace(
                                os.sep, '_'), '_%s.xml' % row[0])
                            expected = 'expected%s%s' % (os.sep, outfile)
                            print '\n test %s:%s' % (testfile, row[0])

                            result = http_request('GET', request)

                            status = get_validity(expected, result, outfile,
                                                  force_id_mask)

                            if status == 1:
                                print '  passed'
                                PASSED += 1
                            elif status == 0:
                                print '  initialized'
                                INITED += 1
                            elif status == -1 and DATABASE == 'PostgreSQL':
                                print '  warning: possible collation issue'
                                WARNING += 1
                            else:
                                print '  FAILED'
Beispiel #10
0
    elif mtype == 'http://www.opengis.net/wcs':  # WCS
        LOGGER.debug('WCS detected, fetching via OWSLib')
        return _parse_wcs(context, repos, record, identifier)

    elif mtype == 'http://www.opengis.net/sos/1.0':  # SOS 1.0.0
        LOGGER.debug('SOS 1.0.0 detected, fetching via OWSLib')
        return _parse_sos(context, repos, record, identifier, '1.0.0')

    elif mtype == 'http://www.opengis.net/sos/2.0':  # SOS 2.0.0
        LOGGER.debug('SOS 2.0.0 detected, fetching via OWSLib')
        return _parse_sos(context, repos, record, identifier, '2.0.0')

    elif (mtype == 'http://www.opengis.net/cat/csw/csdgm'
          and record.startswith('http')):  # FGDC
        LOGGER.debug('FGDC detected, fetching via HTTP')
        record = util.http_request('GET', record)

    return _parse_metadata(context, repos, record)


def _set(context, obj, name, value):
    ''' convenience method to set values '''
    setattr(obj, context.md_core_model['mappings'][name], value)


def _parse_metadata(context, repos, record):
    """parse metadata formats"""

    if isinstance(record, str):
        exml = etree.fromstring(record)
    else:  # already serialized to lxml
Beispiel #11
0
def parse_record(context, record, repos=None,
    mtype='http://www.opengis.net/cat/csw/2.0.2',
    identifier=None, pagesize=10):
    ''' parse metadata '''

    if identifier is None:
        identifier = uuid.uuid4().get_urn()

    # parse web services
    if (mtype == 'http://www.opengis.net/cat/csw/2.0.2' and
        isinstance(record, str) and record.startswith('http')):
        LOGGER.debug('CSW service detected, fetching via HTTP')
        # CSW service, not csw:Record
        try:
            return _parse_csw(context, repos, record, identifier, pagesize)
        except Exception as err:
            # TODO: implement better exception handling
            if str(err).find('ExceptionReport') != -1:
                msg = 'CSW harvesting error: %s' % str(err)
                LOGGER.debug(msg)
                raise RuntimeError(msg)
            LOGGER.debug('Not a CSW, attempting to fetch Dublin Core')
            try:
                content = util.http_request('GET', record)
            except Exception as err:
                raise RuntimeError('HTTP error: %s' % str(err))
            return [_parse_dc(context, repos, etree.fromstring(content))]

    elif mtype == 'urn:geoss:waf':  # WAF
        LOGGER.debug('WAF detected, fetching via HTTP')
        return _parse_waf(context, repos, record, identifier)

    elif mtype == 'http://www.opengis.net/wms':  # WMS
        LOGGER.debug('WMS detected, fetching via OWSLib')
        return _parse_wms(context, repos, record, identifier)
     
    elif mtype == 'http://www.opengis.net/wps/1.0.0':  # WPS
        LOGGER.debug('WPS detected, fetching via OWSLib')
        return [_parse_wps(context, repos, record, identifier)]

    elif mtype == 'http://www.opengis.net/wfs':  # WFS
        LOGGER.debug('WFS detected, fetching via OWSLib')
        return _parse_wfs(context, repos, record, identifier)

    elif mtype == 'http://www.opengis.net/wcs':  # WCS
        LOGGER.debug('WCS detected, fetching via OWSLib')
        return _parse_wcs(context, repos, record, identifier)

    elif mtype == 'http://www.opengis.net/sos/1.0':  # SOS 1.0.0
        LOGGER.debug('SOS 1.0.0 detected, fetching via OWSLib')
        return _parse_sos(context, repos, record, identifier, '1.0.0')

    elif mtype == 'http://www.opengis.net/sos/2.0':  # SOS 2.0.0
        LOGGER.debug('SOS 2.0.0 detected, fetching via OWSLib')
        return _parse_sos(context, repos, record, identifier, '2.0.0')

    elif (mtype == 'http://www.opengis.net/cat/csw/csdgm' and
          record.startswith('http')):  # FGDC
        LOGGER.debug('FGDC detected, fetching via HTTP')
        record = util.http_request('GET', record)

    return _parse_metadata(context, repos, record)