def get(self): # Loading current weather data… current_url = CURRENT_URL.format(self.config.location) with closing(urllib.urlopen(current_url)) as file_handle: current_data = parse_xml(file_handle) # Loading forecast data… forecast_url = FORECAST_URL.format(self.config.location) with closing(urllib.urlopen(forecast_url)) as file_handle: forecast_data = parse_xml(file_handle) forecast = [] for day in forecast_data.find('simpleforecast').getchildren()[1:4]: forecast.append(day.find('icon').text) return { 'current': { 'weather': current_data.find('weather').text, 'temperature': current_data.find('temp_c').text, 'humidity': current_data.find('relative_humidity').text[:-1], # strip the '%' 'wind_direction': current_data.find('wind_dir').text, 'wind_speed': str(round(float(current_data.find('wind_mph').text) * KILOMETER_MILES_RATIO, 1)), 'pressure': current_data.find('pressure_mb').text, 'visibility': current_data.find('visibility_km').text, 'icon': current_data.find('icon').text }, 'forecast': forecast }
def read_scheme(self): if not os.path.isfile(self.scheme_path): from . import MissingConfigurationDefinitionFile raise MissingConfigurationDefinitionFile("Could not find %r." % self.scheme_path) tree = parse_xml(self.scheme_path) root = tree.getroot() scheme = ordereddict() for child in root.getchildren(): option_name = child.tag attributes = dict(child.attrib) option_type = attributes.pop('type') if option_type.startswith('multioption'): # TODO: Hrm attributes['default'] = child.attrib.pop('default', None) attributes['options'] = unserialize_atomic(child, FIELD_TYPE_MAP) else: if not ( FIELD_TYPE_MAP.get(option_type) in ('list', 'tuple', 'dict') and not child.getchildren() ): attributes['default'] = unserialize_atomic(child, FIELD_TYPE_MAP) scheme[option_name] = get_field(option_type)(**attributes) return scheme
def get_info(superevent): superevent_id = superevent['superevent_id'] result = {'superevent_id': superevent_id} api = client.superevents[superevent_id] # Scan VOEvents in reverse order (newest to oldest). voevents = api.voevents.get()[::-1] for voevent in voevents: try: voevent_xml = parse_xml(api.files[voevent['filename']].get()) except requests.exceptions.HTTPError as e: # Some VOEvents cannot be found because the files in GraceDB were # not exposed to the public. Skip them. if e.response.status_code == 404: warnings.warn(f'HTTP Error 404 for f{voevent["filename"]}') continue else: raise # Get source classification and source properties. for group in ['Classification', 'Properties']: for key, value in get_params_for_group(voevent_xml, group).items(): result.setdefault(key, value) # Look for a BAYESTAR sky map. elem = voevent_xml.find(".//Param[@name='skymap_fits']") if elem is not None and 'bayestar' in elem.attrib['value'].lower(): skymap = get_skymap(elem.attrib['value']) for key, value in get_skymap_stats(skymap).items(): result.setdefault(key, value) if all(key in result for key in ['Terrestrial', 'area(90)']): return result else: raise RuntimeError(f'Missing some information for {superevent_id}')
def __init__(self, path): self.root_namespace = Namespace() self.functions = [] self.classes = [] self.structs = [] self.enums = [] # Begin temporary, the variables only exist during import, # meaning while __init__ runs # A dict of {namespace: {function: [signatures]}} self.function_signatures = defaultdict(partial(defaultdict, list)) self.qualtypes = [] self.nodes = {} # End temporary variables doc = parse_xml(path) for xml_node in doc.xpath('/HeaderExport/Class'): self.parse_class(xml_node) for xml_node in doc.xpath('/HeaderExport/Struct'): self.parse_struct(xml_node) for xml_node in doc.xpath('/HeaderExport/Function'): self.parse_function(xml_node) for xml_node in doc.xpath('/HeaderExport/Enum'): self.parse_enumeration(xml_node) self.process_functions() self.process_contexts() self.process_types() self.process_bases() del self.nodes
def main(): engine = create_engine('sqlite:///test.sqlite') session.bind = engine session.autoflush = False filename = sys.argv[1] xml = parse_xml(filename) ctx = {} for game in xml.xpath('/wild/game'): ctx['version'] = get_version(game.get('version')) # XXX region should be set based on the location ctx['region'] = ctx['version'].version_group.generation.main_region for loc in game.xpath('location'): ctx['location'] = get_or_create_location(loc, ctx) for area in loc.xpath('area'): ctx['area'] = create_area(area, ctx) if area.get('name', False): print loc.get('name') + "/" + area.get('name') else: print loc.get('name') encounters = list(reduce_encounters(area)) insert_encounters(encounters, ctx) #for e in sorted(encounters, # key=itemgetter('method', 'terrain')): # print e session.flush() session.commit()
def from_xml(self, content): """ Given some XML data, returns a Python dictionary of the decoded data. """ if lxml is None: raise ImproperlyConfigured("Usage of the XML aspects requires lxml.") return self.from_etree(parse_xml(StringIO(content)).getroot())
def from_xml(self, content): """ Given some XML data, returns a Python dictionary of the decoded data. """ if lxml is None: raise UnsupportedDeserializationFormat("Usage of the XML aspects requires lxml.") return self.from_etree(parse_xml(StringIO(content)).getroot())
async def _preprocess_soap(req): from lxml.etree import fromstring as parse_xml body = await req.read() root = parse_xml(body) action = _find_element(root, 'Body/*[1]') return action
def htmlXPath(xpathExpr, body): try: xmlNode = parse_xml(StringIO(body), parser=HTMLParser()).getroot() except XMLSyntaxError: print(body) raise for result in xmlNode.xpath(xpathExpr): yield result
def htmlXPath(xpathExpr, body): try: xmlNode = parse_xml(BytesIO(body), parser=HTMLParser()).getroot() except XMLSyntaxError: print(body) raise for result in xmlNode.xpath(xpathExpr): yield result
def main(): global base_url, ajax_url, page_param, page_num_of_article, page_start, page_step, page_encoding, page_sleep, count count = 0 last_page = scraperwiki.sqlite.get_var('last_page', -1) latest_article = None start_over = False if last_page == -1: last_page = page_start latest_article = scraperwiki.sqlite.get_var('latest_article', None) start_over = True opener = urllib2.build_opener() opener.addheaders = [ ('User-agent', 'Mozilla/5.0'), ('Referer', base_url), ('X-Requested-With', 'XMLHttpRequest') ] urllib2.install_opener(opener) error_count = 0 num_of_article = page_num_of_article while num_of_article == page_num_of_article: params[page_param] = last_page try: xml = scraperwiki.scrape(ajax_url, params) except urllib2.URLError, e: print 'Cannot reach the server:', if hasattr(e, 'reason'): print e.reason elif hasattr(e, 'code'): print e.code error_count += 1 if error_count < 3: continue else: break # try: # xml = xml.decode(page_encoding) # except UnicodeDecodeError: # encoded = '' # for word in xml.split(' '): # try: # encoded += word.decode(page_encoding) + ' ' # except UnicodeDecodeError: # pass # xml = encoded.rstrip() num_of_article = scrape(parse_xml(xml), latest_article, start_over) page = last_page / page_step if (page_start == 0): page += 1 scraperwiki.sqlite.save_var('last_page', last_page) print 'Page', page, ',', num_of_article, 'article(s)' last_page += page_step time.sleep(page_sleep)
def get_temperature(self, location): print print location print current_url = CURRENT_URL.format(location) with closing(urllib.urlopen(current_url)) as file_handle: current_data = parse_xml(file_handle) return {'temperature': current_data.find('temp_c').text, 'location': location}
def parseHtmlAsXml(body): def forceXml(body): newBody = body for entity, replacement in _entities.items(): newBody = newBody.replace(entity, replacement) newBody = _scriptTagRegex.sub('', newBody) return newBody try: return parse_xml(StringIO(forceXml(body))) except XMLSyntaxError: print body raise
def main(args=None): if args is None: args = sys.argv[1:] try: filename = args[0] except IndexError: filename = "-" xml = parse_xml(filename if filename != "-" else sys.stdin) collapse(xml) xml.write(sys.stdout.buffer, pretty_print=True)
def createReturnValue(header, body, parse): if parse and body.strip() != '': try: body = parse_xml(StringIO(body)) except: try: body = HTML(body, HTMLParser(recover=True)) except: print "Exception parsing:" print body raise return header, body
def _subtitle_file_as_mpl2(self, xmlfile, language): # see http://lists.mplayerhq.hu/pipermail/mplayer-users/2003-February/030222.html tempfile = NamedTempfile(self.video_id+'-subtitle-'+language+'.mpl2') if not tempfile.isempty(): return tempfile.name with tempfile: xmltree = parse_xml(xmlfile).getroot() for element in xmltree: start = int(float(element.attrib['start'])*1000) end = start + int(float(element.attrib['dur'])*1000) text = element.text.replace('\n', '|') tempfile.file.write('[{0}][{1}]{2}\n'.format(start, end, text)) return tempfile.name
async def read_rrd(remote,fn): """ Dump a RRD fiel to XML and parse it. """ if remote: # This is a hack fn = "/var/lib/rrdcached/db/collectd/"+str(fn)[7:] # ssh is broken WRT quoting p = await trio.run_process(["ssh","-n",remote,"rrdtool","dump",shlex.quote(fn)], capture_stdout=True) else: p = await trio.run_process(["rrdtool","dump",str(fn)], capture_stdout=True) # Moving the parser to a background task doesn't pseed things up return parse_xml(p.stdout)
def __init__(self, path): self.functions = [] self.classes = [] self.structs = [] self.enums = [] # Begin temporaries self.nodes = {} self.namespaces = [] # A dict of {context: {function: [signatures]}} self.signatures = defaultdict(partial(defaultdict, list)) # End temporaries parsers = { 'Struct': self.parse_class, # Not all Struct tags are POD 'Class': self.parse_class, 'Function': self.parse_function, 'Method': self.parse_method, 'Constructor': self.parse_method, 'Destructor': self.parse_method, 'Field': self.parse_field, 'Enumeration': self.parse_enum, 'ArrayType': self.parse_array, 'PointerType': self.parse_pointer, 'ReferenceType': self.parse_reference, 'CvQualifiedType': self.parse_qualified_type, 'FundamentalType': self.parse_builtin, 'Typedef': self.parse_typedef, 'File': self.parse_file, 'Namespace': self.parse_namespace, } doc = parse_xml(path) for xml_node in doc.getroot().iterchildren(): parser = parsers.get(xml_node.tag) if parser: parser(xml_node) self.process_locations() self.process_contexts() self.process_namespaces() self.process_signatures() self.process_classes() self.process_types() del self.nodes del self.namespaces
def __init__(self, path): self.pools = {} self.repositories = {} self.options = {} self.path = path self.tree = parse_xml(path) root = self.tree.getroot() for elm in root.xpath('//configuration/pool'): self.pools[elm.get('id')] = { 'path': elm.get('path') } for elm in root.xpath('//configuration/repository'): self.repositories[elm.get('id')] = { 'remote': elm.get('remote'), 'pool': elm.get('pool'), } for elm in root.xpath('//configuration/option'): self.options[elm.get('key')] = elm.get('value')
def createReturnValue(header, body, parse): if parse and body.strip() != '': body = parse_xml(StringIO(body)) return header, body
def load(cls, file_path): return cls.from_xml(parse_xml(file_path))
def parse_response(self, response): """Parse the raw XML content.""" # Requesting the text content of the response doesn't remove the BOM so # we request the binary content and decode it ourselves to remove it. f = io.StringIO(response.content.decode('utf-8-sig')) return parse_xml(f)
def parse(page, filename): estimator = Estimator() document = parse_xml(filename) # type: ElementTree estimator.estimate(page, document)
def from_xml(self, content): if lxml is None: raise ImproperlyConfigured("Usage of the XML aspects requires lxml.") return self.from_etree(parse_xml(StringIO(content)).getroot())