예제 #1
0
def anonym_dom(dom, selectors, mode):
  '''
  Anonymize the script by removing scripts that disclose user info,
  garble contents in the DOM tree, and stuff them back into big pipes.
  '''
  dom = del_blockcomment(dom)
  start = 0
  m_tag = re_tag.search(dom, start)
  new_dom = ''
  while m_tag:  # more tags
    tag = m_tag.group(0)
    # skipping content before the tag
    current = m_tag.start()
    if current > 0:
      new_dom += dom[start:current]
      start = current
    # handle tag
    if tag[1] == '/':  # closing tags are harmless
      new_dom += tag
    else:  # opening tag
      new_dom += anonym_tag(tag, selectors, mode)
    # getting none tag part after the tag
    start = m_tag.end()
    current = skip_content(dom, start)
    if current > start and\
      not re_empty.match(dom[start:current]) and\
      tag[1:7] != 'script':
      new_dom += anonym_str(dom[start:current], mode)
    else:
      new_dom += dom[start:current]
    start = current
    m_tag = re_tag.search(dom, start)
  return new_dom
예제 #2
0
def prettify(dom):
  '''
  Add indentation to a DOM file so that it is easier to read.
  '''
  # delete commenting
  dom = del_blockcomment(dom)
  start = 0
  depth = -1
  new_dom = ''
  m_tag = re_tag.search(dom)
  while m_tag:  # more tags
    tag = m_tag.group(0)
    label = re_tag_label.match(tag).group('label')
    # jump to the start of the tag, what's in front is probably comments
    current = dom.find(tag, start)
    if current > 0:
      new_dom += dom[start:current]
      start = current
    # indent tag
    if label not in EMPTY_ELEMENTS:  # has effect on depth
      if tag[1] == '/':  # closing tag
        depth -= 1
      else:  # opening tag
        depth += 1
      new_dom += indent(tag, depth)
    else:  # still need to indent by one unit more
      new_dom += indent(tag, depth + 1)
    # start a new line for content after a tag
    start += len(tag)
    current = skip_content(dom, start)
    if current > start and not re_empty.match(dom[start:current]):
      new_dom += indent(dom[start:current], depth + 1)
    start = current
    m_tag = re_tag.search(dom[start:])
  return new_dom
예제 #3
0
def selector_index(s):
    '''
  This function scans css to find distinctive selectors.
  '''
    s = del_blockcomment(s)
    # build indices of selectors:
    # one for ids (#NAME), one for classes (.CLASS)
    set_id = set()
    set_class = set()
    selectors = re_cssrule.findall(s)
    for selector in selectors:
        set_id.update(re_css_id.findall(selector))
        set_class.update(re_css_class.findall(selector))
    return {'id': set_id, 'class': set_class}
예제 #4
0
def selector_index(s):
  '''
  This function scans css to find distinctive selectors.
  '''
  s = del_blockcomment(s)
  # build indices of selectors:
  # one for ids (#NAME), one for classes (.CLASS)
  set_id = set()
  set_class = set()
  selectors = re_cssrule.findall(s)
  for selector in selectors:
    set_id.update(re_css_id.findall(selector))
    set_class.update(re_css_class.findall(selector))
  return {'id': set_id, 'class': set_class}