Example #1
0
    def _parseXSEED(self, data):
        """
        Parse a XML-SEED string.

        :type data: File pointer or StringIO object.
        """
        data.seek(0)
        root = xmlparse(data).getroot()
        xseed_version = root.get('version')
        headers = root.getchildren()
        # Set all temporary attributes.
        self.temp = {'volume': [], 'abbreviations': [], 'stations': []}
        # Parse volume which is assumed to be the first header. Only parse
        # blockette 10 and discard the rest.
        self.temp['volume'].append(
            self._parseXMLBlockette(headers[0].getchildren()[0], 'V',
                                    xseed_version))
        # Append all abbreviations.
        for blkt in headers[1].getchildren():
            self.temp['abbreviations'].append(
                self._parseXMLBlockette(blkt, 'A', xseed_version))
        # Append all stations.
        for control_header in headers[2:]:
            if not control_header.tag == 'station_control_header':
                continue
            self.temp['stations'].append([])
            for blkt in control_header.getchildren():
                self.temp['stations'][-1].append(
                    self._parseXMLBlockette(blkt, 'S', xseed_version))
        # Update internal values.
        self._updateInternalSEEDStructure()
    def _parseXSEED(self, data):
        """
        Parse a XML-SEED string.

        :type data: File pointer or StringIO object.
        """
        data.seek(0)
        root = xmlparse(data).getroot()
        xseed_version = root.get('version')
        headers = root.getchildren()
        # Set all temporary attributes.
        self.temp = {'volume': [], 'abbreviations': [], 'stations': []}
        # Parse volume which is assumed to be the first header. Only parse
        # blockette 10 and discard the rest.
        self.temp['volume'].append(
            self._parseXMLBlockette(headers[0].getchildren()[0], 'V',
                                    xseed_version))
        # Append all abbreviations.
        for blkt in headers[1].getchildren():
            self.temp['abbreviations'].append(
                self._parseXMLBlockette(blkt, 'A', xseed_version))
        # Append all stations.
        for control_header in headers[2:]:
            if not control_header.tag == 'station_control_header':
                continue
            self.temp['stations'].append([])
            for blkt in control_header.getchildren():
                self.temp['stations'][-1].append(
                    self._parseXMLBlockette(blkt, 'S', xseed_version))
        # Update internal values.
        self._updateInternalSEEDStructure()
Example #3
0
def iter_parse(path):
    root = xmlparse(path).getroot()
    nsmap = {
        'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
        'api': 'http://sentic.net/api/',
    }

    for concept in root.iterfind('.//rdf:Description', nsmap):
        text = concept.find('./api:text', nsmap).text
        polarity = concept.find('./api:polarity', nsmap).text
        yield {'text': _normalize_term(text), 'polarity': float(polarity)}
 def readFile(self):
     """
     Reads a file and writes everything it finds to self.channel_lists.
     """
     # Parse the file. Return if it could not be read.
     try:
         xml = xmlparse(self.file)
     except:
         return
     # Add some error handling.
     try:
         root = xml.getroot()
     except:
         return
     # This is the last check. Otherwise it is just assumed to be a correct
     # xml file.
     if root.tag != 'channel_lists':
         return
     # Get the lists.
     lists = root.getchildren()
     # If no lists are there return.
     if len(lists) == 0:
         return
     # Loop over each channel.
     for channel in lists:
         channels = channel.getchildren()
         # If there are no channels in the list remove it.
         if len(channels) == 0:
             continue
         list_name = channel.attrib['name']
         channel_list = []
         for item in channels:
             channel_list.append(item.text)
         # Now that all information if given write it to the dictionary.
         _i = 1
         # Limit the while loop to 100 for savety reasons.
         while _i < 100:
             if _i == 1:
                 cur_name = list_name
             else:
                 cur_name = '%s_%i' % (list_name, _i)
             # Check if the name is alreadt in the dictionary, otherwise
             # increment the number.
             if cur_name in self.channel_lists:
                 _i += 1
                 continue
             self.channel_lists[cur_name] = channel_list
             break
 def readFile(self):
     """
     Reads a file and writes everything it finds to self.channel_lists.
     """
     # Parse the file. Return if it could not be read.
     try:
         xml = xmlparse(self.file)
     except:
         return
     # Add some error handling.
     try:
         root = xml.getroot()
     except:
         return
     # This is the last check. Otherwise it is just assumed to be a correct
     # xml file.
     if root.tag != 'channel_lists':
         return
     # Get the lists.
     lists = root.getchildren()
     # If no lists are there return.
     if len(lists) == 0:
         return
     # Loop over each channel.
     for channel in lists:
         channels = channel.getchildren()
         # If there are no channels in the list remove it.
         if len(channels) == 0:
             continue
         list_name = channel.attrib['name']
         channel_list = []
         for item in channels:
             channel_list.append(item.text)
         # Now that all information if given write it to the dictionary.
         _i = 1
         # Limit the while loop to 100 for savety reasons.
         while _i < 100:
             if _i == 1:
                 cur_name = list_name
             else:
                 cur_name = '%s_%i' % (list_name, _i)
             # Check if the name is alreadt in the dictionary, otherwise
             # increment the number.
             if cur_name in self.channel_lists:
                 _i += 1
                 continue
             self.channel_lists[cur_name] = channel_list
             break
def get_mapa():
    base_url = "http://www.rotadareciclagem.com.br/site.html?method=carregaEntidades&"

    options = "latMax=27.293068543847625&lngMax=84.66230031250007&latMin=-50.46827383595759&lngMin=-161.851566875&zoomAtual=14"
    #options = "latMax=-15.76194355063211&lngMax=-47.86326041431886&latMin=-15.80281903113728&lngMin=-47.98362851353151&zoomAtual=14"

    a = xmlparse(base_url + options).getroot()
    markers = a.findall('marker')

    for m in markers:
        data = {}
        data['lat'] = m.get('lat')
        data['lng'] = m.get('lng')
        data['id'] = m.get('id')
        data['type'] = m.get('prefixo')
        data['nome'] = m.text
        scraperwiki.sqlite.save(['id'], data)
def get_mapa():
    base_url = "http://www.rotadareciclagem.com.br/site.html?method=carregaEntidades&"
    
    options = "latMax=27.293068543847625&lngMax=84.66230031250007&latMin=-50.46827383595759&lngMin=-161.851566875&zoomAtual=14"
    #options = "latMax=-15.76194355063211&lngMax=-47.86326041431886&latMin=-15.80281903113728&lngMin=-47.98362851353151&zoomAtual=14"
    
    
    a = xmlparse(base_url+options).getroot()
    markers = a.findall('marker')
    
    for m in markers:
        data = {}
        data['lat'] = m.get('lat')
        data['lng'] = m.get('lng')
        data['id'] = m.get('id')
        data['type'] =  m.get('prefixo')
        data['nome'] = m.text
        scraperwiki.sqlite.save(['id'], data)
Example #8
0
def normalize(iterable):
    log.info('Starting MOTI data normalization')
    file_stream = iterable_to_stream(iterable)
    et = xmlparse(file_stream)
    et = transform(et)

    obs_series = et.xpath("//observation-series")
    for series in obs_series:
        try:
            stn_id = series.xpath(
                        "./origin/id[@type='client']")[0].text.strip()
        except IndexError as e:
            log.error("Could not detect the station id: xpath search "
                      "'//observation-series/origin/id[@type='client']' "
                      "return no results", extra={'exception': e})
            continue

        members = series.xpath('./observation', namespaces=ns)
        for member in members:
            # get time and convert to datetime
            time = member.get('valid-time')
            if not time:
                log.warning("Could not find a valid-time attribute for this "
                            "observation")
                continue

            tz = pytz.timezone('Canada/Pacific')
            try:
                date = dateparse(time).replace(tzinfo=tz)
            except ValueError as e:
                log.warning('Unable to convert value to datetime',
                            extra={'time': time})
                continue

            for obs in member.iterchildren():
                variable_name = obs.get('type')
                if variable_name is None:
                    continue

                try:
                    value_element = obs.xpath('./value')[0]
                except IndexError as e:
                    log.warning("Could not find the actual value for "
                                "observation. xpath search './value' "
                                "returned no results",
                                extra={'variable_name': variable_name})
                    continue

                try:
                    value = float(value_element.text)
                except ValueError:
                    log.error("Could not convert value to a number. "
                              "Skipping this observation.",
                              extra={'value': value_element})
                    continue

                yield Row(time=date,
                          val=value,
                          variable_name=variable_name,
                          unit=value_element.get('units'),
                          network_name='MoTIe',
                          station_id=stn_id,
                          lat=None,
                          lon=None)
Example #9
0
# Standard module
import pytz
import logging

# Installed libraries
from pkg_resources import resource_filename
from lxml.etree import XSLT, parse as xmlparse
from dateutil.parser import parse as dateparse

# Local
from crmprtd import Row, iterable_to_stream


xsl = resource_filename('crmprtd', 'data/moti.xsl')
transform = XSLT(xmlparse(xsl))
ns = {
    'xsi': "http://www.w3.org/2001/XMLSchema-instance"
}
log = logging.getLogger(__name__)


def normalize(iterable):
    log.info('Starting MOTI data normalization')
    file_stream = iterable_to_stream(iterable)
    et = xmlparse(file_stream)
    et = transform(et)

    obs_series = et.xpath("//observation-series")
    for series in obs_series:
        try:
Example #10
0
 def prepare(self):
   self.doc = xmlparse(self.path)
   self.select()
   super(XMLFileTransaction,self).prepare()
Example #11
0
def normalize(file_stream):
    log.info("Starting MOTI data normalization")
    et = xmlparse(file_stream)
    et = transform(et)
    obs_series = et.xpath("//observation-series")
    for series in obs_series:
        if not len(series):
            log.warning("Empty observation series: xpath search "
                        "'//observation-series' return no results")
            continue
        try:
            stn_id = series.xpath(
                "./origin/id[@type='client']")[0].text.strip()
        except IndexError as e:
            log.error(
                "Could not detect the station id: xpath search "
                "'//observation-series/origin/id[@type='client']' "
                "return no results",
                extra={"exception": e},
            )
            continue

        members = series.xpath("./observation", namespaces=ns)
        for member in members:
            # get time and convert to datetime
            time = member.get("valid-time")
            if not time:
                log.warning("Could not find a valid-time attribute for this "
                            "observation")
                continue

            try:
                # MoTI gives us an ISO formatted time string with
                # timezone info attached so it should be sufficient to
                # simply parse it and display it as UTC.
                date = dateparse(time).astimezone(pytz.utc)
            except ValueError as e:
                log.warning("Unable to convert value to datetime",
                            extra={"time": time})
                continue

            for obs in member.iterchildren():
                variable_name = obs.get("type")
                if variable_name is None:
                    continue

                try:
                    value_element = obs.xpath("./value")[0]
                except IndexError as e:
                    log.warning(
                        "Could not find the actual value for "
                        "observation. xpath search './value' "
                        "returned no results",
                        extra={"variable_name": variable_name},
                    )
                    continue

                try:
                    value = float(value_element.text)
                except ValueError:
                    log.error(
                        "Could not convert value to a number. "
                        "Skipping this observation.",
                        extra={"value": value_element},
                    )
                    continue

                yield Row(
                    time=date,
                    val=value,
                    variable_name=variable_name,
                    unit=value_element.get("units"),
                    network_name="MoTIe",
                    station_id=stn_id,
                    lat=None,
                    lon=None,
                )
Example #12
0
#!/usr/bin/env python

# Standard module
import pytz
import logging

# Installed libraries
from pkg_resources import resource_filename
from lxml.etree import XSLT, parse as xmlparse
from dateutil.parser import parse as dateparse

# Local
from crmprtd import Row

xsl = resource_filename("crmprtd", "data/moti.xsl")
transform = XSLT(xmlparse(xsl))
ns = {"xsi": "http://www.w3.org/2001/XMLSchema-instance"}
log = logging.getLogger(__name__)


def normalize(file_stream):
    log.info("Starting MOTI data normalization")
    et = xmlparse(file_stream)
    et = transform(et)
    obs_series = et.xpath("//observation-series")
    for series in obs_series:
        if not len(series):
            log.warning("Empty observation series: xpath search "
                        "'//observation-series' return no results")
            continue
        try: