Example #1
0
def localize_misc(dom, path, prefix='misc'):
  '''
  A few standalone resources to localize, including an xml, an ico and an iframe
  '''
  misc_path = os.path.join(path, prefix)
  if not os.path.exists(misc_path):
    os.mkdir(misc_path)
  re_search = re.compile(
'<link rel="search" type="application/opensearchdescription\+xml" \
href="(?P<url>http://.*?\.xml)" title="Facebook">')
  m_search = re_search.search(dom)
  url = m_search.group('url')
  file = FBParser.url_to_file(url)
  FBParser.save_resource(url, misc_path, file)
  dom = dom.replace(url, os.path.join(prefix, file))
  re_ico = re.compile(
'<link rel="shortcut icon" href="(?P<url>http://.*?\.ico)">')
  m_ico = re_ico.search(dom)
  url = m_ico.group('url')
  file = FBParser.url_to_file(url)
  FBParser.save_resource(url, misc_path, file)
  dom = dom.replace(url, os.path.join(prefix, file))
  re_uicif = re.compile('<iframe src="(?P<url>http://.*?\.html)"')
  m_uicif = re_uicif.search(dom)
  if m_uicif:
    url = m_uicif.group('url')
    file = FBParser.url_to_file(url)
    FBParser.save_resource(url, misc_path, file)
    dom = dom.replace(url, os.path.join(prefix, file))
  # redirect the rest of the hrefs to about:blank (most of them are hyperlinks)
  # (call this after localize_css!)
  dom = re.sub('href="http://.+?"', 'href="about:blank"', dom)
  return dom
Example #2
0
def anonym_images(dom, path, filename):
  '''
  Anonymize images and regenerate file names.
  '''
  img_files = FBParser.get_content(
    os.path.join(path, filename.rstrip('html') + 'img_list'),
    encoding='ascii')
  img_files = img_files.split('\n')
  images = {}
  if os.path.isfile(
    os.path.join(path, filename.rstrip('html') + 'img_mapping')):
  # already have garbled images, only replace filenames in dom
    img_mapping = FBParser.get_content(
      os.path.join(path, filename.rstrip('html') + 'img_mapping'),
      encoding='ascii')
    img_mapping = img_mapping.split('\n')
    for mapping in img_mapping:
      img_file, new_file = mapping.split(': ')
      dom = dom.replace(img_file, new_file)
  else:  # garbled image not generated yet
    for img_file in img_files:
      ext = os.path.splitext(img_file)[1]
      prefix = os.path.split(img_file)[0]
      new_file = prefix + '/anonym_' + str(random.getrandbits(40)) + ext
      images[img_file] = new_file
      dom = dom.replace(img_file, new_file)
      FBParser.save_resource(os.path.join(path, img_file), path, new_file)
    st_mapping = []
    for key, val in images.items():
      st_mapping.append(key + ': ' + val)
    FBParser.save_content(
      '\n'.join(st_mapping),
      os.path.join(path, filename.rstrip('html') + 'img_mapping'),
      encoding='ascii')
    for image in images:
      images[image] = os.path.join(path, images[image])
    FBParser.garble_image.garble(images.values())
  return dom
Example #3
0
def anonym_images(dom, path, filename):
  '''
  Anonymize images and regenerate file names.
  '''
  img_files = FBParser.get_content(
    os.path.join(path, filename.rstrip('html') + 'img_list'),
    encoding='ascii')
  img_files = img_files.split('\n')
  images = {}
  for img_file in img_files:
    ext = os.path.splitext(img_file)[1]
    prefix = os.path.split(img_file)[0]
    new_file = prefix + '/anonym_' + str(random.getrandbits(40)) + ext
    images[img_file] = new_file
    dom = dom.replace(img_file, new_file)
    FBParser.save_resource(os.path.join(path, img_file), path, new_file)
    os.remove(os.path.join(path, img_file))
  st_mapping = []
  for image in images:
    images[image] = os.path.join(path, images[image])
  FBParser.garble_image.garble(images.values())
  return dom