def fetch_config(zone, conn): """Fetch all pieces of a Route 53 config from Amazon. Args: zone: string, hosted zone id. conn: boto.route53.Route53Connection Returns: list of ElementTrees, one for each piece of config.""" more_to_fetch = True cfg_chunks = [] next_name = None next_type = None next_identifier = None while more_to_fetch == True: more_to_fetch = False getstr = '/%s/hostedzone/%s/rrset' % (R53_API_VERSION, zone) if next_name is not None: getstr += '?name=%s&type=%s' % (next_name, next_type) if next_identifier is not None: getstr += '&identifier=%s' % next_identifier log.debug('requesting %s' % getstr) resp = conn.make_request('GET', getstr) etree = lxml.etree.parse(resp) cfg_chunks.append(etree) root = etree.getroot() truncated = root.find('{%s}IsTruncated' % R53_XMLNS) if truncated is not None and truncated.text == 'true': more_to_fetch = True next_name = root.find('{%s}NextRecordName' % R53_XMLNS).text next_type = root.find('{%s}NextRecordType' % R53_XMLNS).text try: next_identifier = root.find('{%s}NextRecordIdentifier' % R53_XMLNS).text except AttributeError: # may not have next_identifier next_identifier = None return cfg_chunks
def _etree_to_dict(self, etree): """Convert an XML etree into a list of dicts. This method only takes care of elements, not attributes! :param etree: Etree object to process :return: Data as a list of dict. """ root = etree.getroot() data = [] labels = [] for flow in root: flow_data = {} for i in range(len(flow)): if flow[i].tag != "Tag": flow_data[flow[i].tag] = flow[i].text else: if flow[i].text == "Normal": labels.append(TagValue.Normal) self._num_normal += 1 else: labels.append(TagValue.Attack) self._num_attack += 1 data.append(flow_data) return data, labels
def __parseto_xtree(self, xhtml_s): if isinstance(xhtml_s, dict): base_url = xhtml_s.pop("base_url", None) # print "IN" print base_url resolve_base = xhtml_s.pop("resolve_base", True) clean_xhtml = xhtml_s.pop("clean_xhtml", False) xhtml_s = xhtml_s.pop("xhtml_s", None) assert xhtml_s,\ "LinkExtractor.__parseto_xtree() Error: Dictionary with <None> xhtml source" elif isinstance(xhtml_s, str): clean_xhtml = False base_url = None else: raise Exception( "LinkExtractor.__parseto_xtree() Error: string or dictionary instance expected" ) if clean_xhtml: xhtml_clr = html_clr( scripts=True, javascript=True, comments=True, style=True, links=True, meta=True, page_structure=False, processing_instructions=True, embedded=True, annoying_tags=True, remove_unknown_tags=True ) # meta=False because we need MetaInfo xhtml_s = xhtml_clr.clean_html(xhtml_s) # The HTMLParser(s) should be defined in the thread (or process) when lxml.html.parser is dispatched into it htmlparser = lxml.html.HTMLParser(recover=True, no_network=False) # recover mode and download DTD enabled # Now parse the XHTML source try: etree = lxml.html.parse(StringIO(xhtml_s), parser=htmlparser) except Exception as e: print("LinkExtractor Error: %s" % e) print("LinkExtractor: Now Trying with the SOUP parser") try: etree = soup.parse(xhtml_s) except Exception as e: raise Exception("LinkExtractor Error: %s" % e) if base_url: eroot = etree.getroot() try: eroot.make_links_absolute(base_url, resolve_base_href=resolve_base) except Exception as e: raise Exception( "LinkExtractor.__parseto_xtree() while making links absolute Error: " % e ) # Return the etree just created return etree
def __init__(self, rfile, etree, sftp, fscli, log): self.etree = etree self.root = etree.getroot() self.file = rfile self.sftp = sftp self.cli = fscli self.log = log self._touched = []
def __init__(self, rfile, etree, conf_io, fscli, log): self.etree = etree self.root = etree.getroot() self.file = rfile self.conf_io = conf_io self.cli = fscli self.log = log self._touched = []
def _etree_to_dict(self, etree): """Convert an XML etree into a list of dicts. This method only takes care of elements, not attributes! :param etree: Etree object to process :return: Data as a list of dict. """ root = etree.getroot() data = [] for flow in root: flow_data = {} for i in range(len(flow)): flow_data[flow[i].tag] = flow[i].text data.append(flow_data) return data
def get_schedule(channel, tz): if channel != '9 Канал Израиль': return [] sched = schedule.Schedule(tz, _source_tz) etree = lxml.etree.parse(_URL, _parser) doc = etree.getroot() content = doc[2][7][3][0][4][0][0][4] for tv_program in content[4: 11]: prog_id = tv_program.get('id') dt = datetime.datetime.strptime(prog_id, 'tv_programm_%Y%m%d') sched.set_date(dt.date()) for li in tv_program[0]: span = li[0] sched.set_time(span.text) sched.set_title(span.tail.strip()) return sched.pop()