Esempio n. 1
0
def fetch_config(zone, conn):
  """Fetch all pieces of a Route 53 config from Amazon.

  Args: zone: string, hosted zone id.
        conn: boto.route53.Route53Connection
  Returns: list of ElementTrees, one for each piece of config."""
  more_to_fetch = True
  cfg_chunks = []
  next_name = None
  next_type = None
  next_identifier = None
  while more_to_fetch == True:
    more_to_fetch = False
    getstr = '/%s/hostedzone/%s/rrset' % (R53_API_VERSION, zone)
    if next_name is not None:
      getstr += '?name=%s&type=%s' % (next_name, next_type)
      if next_identifier is not None:
        getstr += '&identifier=%s' % next_identifier
    log.debug('requesting %s' % getstr)
    resp = conn.make_request('GET', getstr)
    etree = lxml.etree.parse(resp)
    cfg_chunks.append(etree)
    root = etree.getroot()
    truncated = root.find('{%s}IsTruncated' % R53_XMLNS)
    if truncated is not None and truncated.text == 'true':
      more_to_fetch = True
      next_name = root.find('{%s}NextRecordName' % R53_XMLNS).text
      next_type = root.find('{%s}NextRecordType' % R53_XMLNS).text
      try:
        next_identifier = root.find('{%s}NextRecordIdentifier' % R53_XMLNS).text
      except AttributeError:  # may not have next_identifier
        next_identifier = None
  return cfg_chunks
Esempio n. 2
0
def fetch_config(zone, conn):
  """Fetch all pieces of a Route 53 config from Amazon.

  Args: zone: string, hosted zone id.
        conn: boto.route53.Route53Connection
  Returns: list of ElementTrees, one for each piece of config."""
  more_to_fetch = True
  cfg_chunks = []
  next_name = None
  next_type = None
  next_identifier = None
  while more_to_fetch == True:
    more_to_fetch = False
    getstr = '/%s/hostedzone/%s/rrset' % (R53_API_VERSION, zone)
    if next_name is not None:
      getstr += '?name=%s&type=%s' % (next_name, next_type)
      if next_identifier is not None:
        getstr += '&identifier=%s' % next_identifier
    log.debug('requesting %s' % getstr)
    resp = conn.make_request('GET', getstr)
    etree = lxml.etree.parse(resp)
    cfg_chunks.append(etree)
    root = etree.getroot()
    truncated = root.find('{%s}IsTruncated' % R53_XMLNS)
    if truncated is not None and truncated.text == 'true':
      more_to_fetch = True
      next_name = root.find('{%s}NextRecordName' % R53_XMLNS).text
      next_type = root.find('{%s}NextRecordType' % R53_XMLNS).text
      try:
        next_identifier = root.find('{%s}NextRecordIdentifier' % R53_XMLNS).text
      except AttributeError:  # may not have next_identifier
        next_identifier = None
  return cfg_chunks
    def _etree_to_dict(self, etree):
        """Convert an XML etree into a list of dicts.

        This method only takes care of elements, not attributes!

        :param etree: Etree object to process
        :return: Data as a list of dict.
        """
        root = etree.getroot()
        data = []
        labels = []
        for flow in root:
            flow_data = {}
            for i in range(len(flow)):
                if flow[i].tag != "Tag":
                    flow_data[flow[i].tag] = flow[i].text
                else:
                    if flow[i].text == "Normal":
                        labels.append(TagValue.Normal)
                        self._num_normal += 1
                    else:
                        labels.append(TagValue.Attack)
                        self._num_attack += 1
            data.append(flow_data)
        return data, labels
Esempio n. 4
0
    def __parseto_xtree(self, xhtml_s):

        if isinstance(xhtml_s, dict):
            base_url = xhtml_s.pop("base_url", None)
            # print "IN"
            print base_url
            resolve_base = xhtml_s.pop("resolve_base", True)
            clean_xhtml = xhtml_s.pop("clean_xhtml", False)
            xhtml_s = xhtml_s.pop("xhtml_s", None)
            assert xhtml_s,\
                "LinkExtractor.__parseto_xtree() Error: Dictionary with <None> xhtml source"

        elif isinstance(xhtml_s, str):
            clean_xhtml = False
            base_url = None

        else:
            raise Exception(
                "LinkExtractor.__parseto_xtree() Error: string or dictionary instance expected"
            )

        if clean_xhtml:
            xhtml_clr = html_clr(
                scripts=True, javascript=True, comments=True, style=True,
                links=True, meta=True, page_structure=False, processing_instructions=True,
                embedded=True, annoying_tags=True, remove_unknown_tags=True
            )
            # meta=False because we need MetaInfo

            xhtml_s = xhtml_clr.clean_html(xhtml_s)

        # The HTMLParser(s) should be defined in the thread (or process) when lxml.html.parser is dispatched into it
        htmlparser = lxml.html.HTMLParser(recover=True, no_network=False)
        # recover mode and download DTD enabled

        # Now parse the XHTML source
        try:
            etree = lxml.html.parse(StringIO(xhtml_s), parser=htmlparser)
        except Exception as e:

            print("LinkExtractor Error: %s" % e)
            print("LinkExtractor: Now Trying with the SOUP parser")

            try:
                etree = soup.parse(xhtml_s)
            except Exception as e:
                raise Exception("LinkExtractor Error: %s" % e)

        if base_url:
            eroot = etree.getroot()
            try:
                eroot.make_links_absolute(base_url, resolve_base_href=resolve_base)
            except Exception as e:
                raise Exception(
                    "LinkExtractor.__parseto_xtree() while making links absolute Error: " % e
                )

        # Return the etree just created
        return etree
Esempio n. 5
0
 def __init__(self, rfile, etree, sftp, fscli, log):
     self.etree = etree
     self.root = etree.getroot()
     self.file = rfile
     self.sftp = sftp
     self.cli = fscli
     self.log = log
     self._touched = []
Esempio n. 6
0
 def __init__(self, rfile, etree, conf_io, fscli, log):
     self.etree = etree
     self.root = etree.getroot()
     self.file = rfile
     self.conf_io = conf_io
     self.cli = fscli
     self.log = log
     self._touched = []
    def _etree_to_dict(self, etree):
        """Convert an XML etree into a list of dicts.

        This method only takes care of elements, not attributes!

        :param etree: Etree object to process
        :return: Data as a list of dict.
        """
        root = etree.getroot()
        data = []
        for flow in root:
            flow_data = {}
            for i in range(len(flow)):
                flow_data[flow[i].tag] = flow[i].text
            data.append(flow_data)
        return data
Esempio n. 8
0
def get_schedule(channel, tz):
    if channel != '9 Канал Израиль':
        return []

    sched = schedule.Schedule(tz, _source_tz)

    etree = lxml.etree.parse(_URL, _parser)
    doc = etree.getroot()
    content = doc[2][7][3][0][4][0][0][4]
    for tv_program in content[4: 11]:
        prog_id = tv_program.get('id')
        dt = datetime.datetime.strptime(prog_id, 'tv_programm_%Y%m%d')
        sched.set_date(dt.date())
        for li in tv_program[0]:
            span = li[0]
            sched.set_time(span.text)
            sched.set_title(span.tail.strip())
    return sched.pop()