Example #1
0
def _parse_site_info(site_info, namespace):
    """returns a dict representation of a site given an etree object
    representing a siteInfo element
    """
    site_code = site_info.find(namespace + "siteCode")

    return_dict = {
        'code': site_code.text,
        'name': site_info.find(namespace + "siteName").text,
        'network': site_code.attrib.get('network'),
    }

    agency = site_code.attrib.get('agencyCode')
    if agency:
        return_dict['agency'] = agency

    geog_location = site_info.find(
        namespace.join(["", "geoLocation/", "geogLocation"]))
    if not geog_location is None:
        return_dict['location'] = _parse_geog_location(geog_location,
                                                       namespace)

    timezone_info = site_info.find(namespace + "timeZoneInfo")
    if not timezone_info is None:
        return_dict['timezone_info'] = _parse_timezone_info(
            timezone_info, namespace)

    elevation_m = site_info.find(namespace + 'elevation_m')
    if not elevation_m is None:
        return_dict['elevation_m'] = elevation_m.text

    # WaterML 1.0 notes
    notes = dict([
        (util.camel_to_underscore(note.attrib['title'].replace(' ',
                                                               '')), note.text)
        for note in site_info.findall(namespace + 'note')
    ])
    if notes:
        return_dict['notes'] = notes

    # WaterML 1.1 siteProperties
    site_properties = dict([
        (util.camel_to_underscore(site_property.attrib['name'].replace(
            ' ', '')), site_property.text)
        for site_property in site_info.findall(namespace + 'siteProperty')
    ])
    if site_properties:
        return_dict['site_property'] = site_properties

    return return_dict
Example #2
0
def _parse_site_info(site_info, namespace):
    """returns a dict representation of a site given an etree object
    representing a siteInfo element
    """
    site_code = site_info.find(namespace + "siteCode")

    return_dict = {
        'code': site_code.text,
        'name': site_info.find(namespace + "siteName").text,
        'network': site_code.attrib.get('network'),
    }

    agency = site_code.attrib.get('agencyCode')
    if agency:
        return_dict['agency'] = agency

    geog_location = site_info.find(
        namespace.join(["", "geoLocation/", "geogLocation"]))
    if not geog_location is None:
        return_dict['location'] = _parse_geog_location(geog_location, namespace)

    timezone_info = site_info.find(namespace + "timeZoneInfo")
    if not timezone_info is None:
        return_dict['timezone_info'] = _parse_timezone_info(timezone_info, namespace)

    elevation_m = site_info.find(namespace + 'elevation_m')
    if not elevation_m is None:
        return_dict['elevation_m'] = elevation_m.text

    # WaterML 1.0 notes
    notes = dict([
        (util.camel_to_underscore(note.attrib['title'].replace(' ', '')),
            note.text)
        for note in site_info.findall(namespace + 'note')
    ])
    if notes:
        return_dict['notes'] = notes

    # WaterML 1.1 siteProperties
    site_properties = dict([
        (util.camel_to_underscore(
            site_property.attrib['name'].replace(' ', '')),
        site_property.text)
        for site_property in site_info.findall(namespace + 'siteProperty')
    ])
    if site_properties:
        return_dict['site_property'] = site_properties

    return return_dict
Example #3
0
def _element_dict_attribute_name(attribute_name, element_name,
        prepend_element_name=True):
    attribute_only = util.camel_to_underscore(attribute_name.split('}')[-1])
    if attribute_only.startswith(element_name) or not prepend_element_name:
        return attribute_only
    else:
        return element_name + '_' + attribute_only
Example #4
0
def _element_dict(element, exclude_children=None, prepend_attributes=True):
    """converts an element to a dict representation with CamelCase tag names and
    attributes converted to underscores; this is a generic converter for cases
    where special parsing isn't necessary.  In most cases you will want to
    update with this dict. If prepend_element_name is True (default), then
    attributes and children will be prepended with the parent element's tag
    name.

    Note: does not handle sibling elements
    """
    if element is None:
        return {}

    if exclude_children is None:
        exclude_children = []

    element_dict = {}
    element_name = util.camel_to_underscore(element.tag.split('}')[-1])

    if len(element) == 0 and not element.text is None:
        element_dict[element_name] = element.text

    element_dict.update(dict([
        (_element_dict_attribute_name(key, element_name,
            prepend_element_name=prepend_attributes), value)
        for key, value in element.attrib.items()
        if value.split(':')[0] not in ['xsd', 'xsi']
    ]))

    for child in element.iterchildren():
        if not child.tag.split('}')[-1] in exclude_children:
            element_dict.update(_element_dict(child))

    return element_dict
Example #5
0
def _service_dict(service_info):
    """converts a ServiceInfo etree object into a service info dict"""
    change_keys = [
        #(old_key, new_key)
        ('aabstract', 'abstract'),
        ('maxx', 'max_x'),
        ('maxy', 'max_y'),
        ('minx', 'min_x'),
        ('miny', 'min_y'),
        ('orgwebsite', 'organization_website'),
        ('serv_url', 'service_url'),
        ('sitecount', 'site_count'),
        ('valuecount', 'value_count'),
        ('variablecount', 'variable_count'),
    ]

    service_dict = dict([
        (util.camel_to_underscore(key), _cast_if_text(value))
        for key, value in dict(service_info).items()
    ])

    for old_key, new_key in change_keys:
        if old_key in service_dict:
            service_dict[new_key] = service_dict[old_key]
            del service_dict[old_key]

    return service_dict
def _series_dict(series_info):
    """converts a ServiceInfo etree object into a service info dict"""
    change_keys = [
        #(old_key, new_key)
        ('aabstract', 'abstract'),
        ('maxx', 'max_x'),
        ('maxy', 'max_y'),
        ('minx', 'min_x'),
        ('miny', 'min_y'),
        ('orgwebsite', 'organization_website'),
        ('serv_url', 'service_url'),
        ('sitecount', 'site_count'),
        ('valuecount', 'value_count'),
        ('variablecount', 'variable_count'),
    ]

    series_dict = dict([
        (util.camel_to_underscore(key), core._cast_if_text(value))
        for key, value in dict(series_info).iteritems()
    ])

    for old_key, new_key in change_keys:
        if old_key in series_dict:
            series_dict[new_key] = series_dict[old_key]
            del series_dict[old_key]

    return series_dict
Example #7
0
def _element_dict_attribute_name(attribute_name, element_name,
        prepend_element_name=True):
    attribute_only = util.camel_to_underscore(attribute_name.split('}')[-1])
    if attribute_only.startswith(element_name) or not prepend_element_name:
        return attribute_only
    else:
        return element_name + '_' + attribute_only
Example #8
0
def _element_dict(element, exclude_children=None, prepend_attributes=True):
    """converts an element to a dict representation with CamelCase tag names and
    attributes converted to underscores; this is a generic converter for cases
    where special parsing isn't necessary.  In most cases you will want to
    update with this dict. If prepend_element_name is True (default), then
    attributes and children will be prepended with the parent element's tag
    name.

    Note: does not handle sibling elements
    """
    if element is None:
        return {}

    if exclude_children is None:
        exclude_children = []

    element_dict = {}
    element_name = util.camel_to_underscore(element.tag.split('}')[-1])

    if len(element) == 0 and not element.text is None:
        element_dict[element_name] = element.text

    element_dict.update(dict([
        (_element_dict_attribute_name(key, element_name,
            prepend_element_name=prepend_attributes), value)
        for key, value in element.attrib.iteritems()
        if value.split(':')[0] not in ['xsd', 'xsi']
    ]))

    for child in element.iterchildren():
        if not child.tag.split('}')[-1] in exclude_children:
            element_dict.update(_element_dict(child))

    return element_dict
Example #9
0
def _parse_metadata(values_element, metadata_elements, namespace):
    metadata = {}
    for tag, collection_name, key in metadata_elements:
        underscored_tag = util.camel_to_underscore(tag)
        collection = [
            _scrub_prefix(_element_dict(element, namespace), underscored_tag)
            for element in values_element.findall(namespace + tag)
        ]
        if len([x for x in collection if len(x)]):
            collection_dict = dict([(item[key], item) for item in collection])
            metadata[collection_name] = collection_dict
    return metadata
Example #10
0
def parse_site_values(content_io, namespace, query_isodate=None):
    """parses values out of a waterml file; content_io should be a file-like object"""
    data_dict = {}
    metadata_elements = [
        # (element name, name of collection,
        #   key from element dict to use as for a key in the collections dict)
        ('censorCode', 'censor_codes', 'censor_code'),
        ('method', 'methods', 'id'),
        ('offset', 'offsets', 'id'),
        ('qualifier', 'qualifiers', 'id'),
        ('qualityControlLevel', 'quality_control_levels', 'id'),
        ('source', 'sources', 'id')
    ]
    for (event, ele) in etree.iterparse(content_io):
        if ele.tag == namespace + "timeSeries":
            source_info_element = ele.find(namespace + 'sourceInfo')
            site_info = _parse_site_info(source_info_element, namespace)
            values_element = ele.find(namespace + 'values')
            values = _parse_values(values_element, namespace)
            var_element = ele.find(namespace + 'variable')
            variable = _parse_variable(var_element, namespace)

            code = variable['code']
            if 'statistic' in variable:
                code += ":" + variable['statistic']['code']
            data_dict[code] = {
                'site': site_info,
                'values': values,
                'variable': variable,
            }

            for tag, collection_name, key in metadata_elements:
                underscored_tag = util.camel_to_underscore(tag)
                collection = [
                    _scrub_prefix(_element_dict(element, namespace),
                        underscored_tag)
                    for element in values_element.findall(namespace + tag)
                ]
                if len(collection):
                    collection_dict = dict([
                        (item[key], item)
                        for item in collection
                    ])
                    data_dict[code][collection_name] = collection_dict

            if query_isodate:
                data_dict[code]['last_refresh'] = query_isodate

    return data_dict
Example #11
0
def parse_site_values(content_io, namespace, query_isodate=None):
    """parses values out of a waterml file; content_io should be a file-like object"""
    data_dict = {}
    metadata_elements = [
        # (element name, name of collection,
        #   key from element dict to use as for a key in the collections dict)
        ('censorCode', 'censor_codes', 'censor_code'),
        ('method', 'methods', 'id'),
        ('offset', 'offsets', 'id'),
        ('qualifier', 'qualifiers', 'id'),
        ('qualityControlLevel', 'quality_control_levels', 'id'),
        ('source', 'sources', 'id')
    ]
    for (event, ele) in etree.iterparse(content_io):
        if ele.tag == namespace + "timeSeries":
            source_info_element = ele.find(namespace + 'sourceInfo')
            site_info = _parse_site_info(source_info_element, namespace)
            values_element = ele.find(namespace + 'values')
            values = _parse_values(values_element, namespace)
            var_element = ele.find(namespace + 'variable')
            variable = _parse_variable(var_element, namespace)

            code = variable['code']
            if 'statistic' in variable:
                code += ":" + variable['statistic']['code']
            data_dict[code] = {
                'site': site_info,
                'values': values,
                'variable': variable,
            }

            for tag, collection_name, key in metadata_elements:
                underscored_tag = util.camel_to_underscore(tag)
                collection = [
                    _scrub_prefix(_element_dict(element, namespace),
                        underscored_tag)
                    for element in values_element.findall(namespace + tag)
                ]
                if len(filter(lambda x: len(x), collection)):
                    collection_dict = dict([
                        (item[key], item)
                        for item in collection
                    ])
                    data_dict[code][collection_name] = collection_dict

            if query_isodate:
                data_dict[code]['last_refresh'] = query_isodate

    return data_dict
Example #12
0
def _parse_metadata(values_element, metadata_elements, namespace):
    metadata = {}
    for tag, collection_name, key in metadata_elements:
        underscored_tag = util.camel_to_underscore(tag)
        collection = [
            _scrub_prefix(_element_dict(element, namespace),
                underscored_tag)
            for element in values_element.findall(namespace + tag)
        ]
        if len([x for x in collection if len(x)]):
            collection_dict = dict([
                (item[key], item)
                for item in collection
            ])
            metadata[collection_name] = collection_dict
    return metadata
Example #13
0
def _parse_series(series, namespace):
    include_elements = [
        'method',
        'Method',
        'source',
        'Source',
        'QualityControlLevel',
        'qualityControlLevel',
        'variableTimeInterval',
        'valueCount',
    ]
    series_dict = {}

    variable_element = series.find(namespace + 'variable')
    series_dict['variable'] = _parse_variable(variable_element, namespace)

    for include_element in include_elements:
        element = series.find(namespace + include_element)
        if not element is None:
            name = util.camel_to_underscore(element.tag)
            element_dict = _scrub_prefix(_element_dict(element), name)
            series_dict[name] = element_dict

    return series_dict
Example #14
0
def _parse_series(series, namespace):
    include_elements = [
        'method',
        'Method',
        'source',
        'Source',
        'QualityControlLevel',
        'qualityControlLevel',
        'variableTimeInterval',
        'valueCount',
    ]
    series_dict = {}

    variable_element = series.find(namespace + 'variable')
    series_dict['variable'] = _parse_variable(variable_element, namespace)

    for include_element in include_elements:
        element = series.find(namespace + include_element)
        if not element is None:
            name = util.camel_to_underscore(element.tag)
            element_dict = _scrub_prefix(_element_dict(element), name)
            series_dict[name] = element_dict

    return series_dict