def test_monkeypatch_implicit(self): soup = BeautifulSoup(HTML) self.assertRaises(TypeError, soup.findSelect, "*") monkeypatch() self.assert_(soup.findSelect("*")) self.assertSelectMultipleExplicit(soup, ("link", ["l1"]), ("div#main", ["main"]), ("div div", ["inner"])) unmonkeypatch() self.assertRaises(TypeError, soup.findSelect, "*")
def test_monkeypatch_implicit(self): soup = BeautifulSoup(HTML) self.assertRaises(TypeError, soup.findSelect, '*') monkeypatch() self.assert_(soup.findSelect('*')) self.assertSelectMultipleExplicit(soup, ('link', ['l1']), ('div#main', ['main']), ('div div', ['inner']), ) unmonkeypatch() self.assertRaises(TypeError, soup.findSelect, '*')
""" Based on Premailer. This is a hack of Premailer that uses BeautifulSoup and SoupSelect instead of lxml. """ # http://www.peterbe.com/plog/premailer.py import re, os import codecs import urlparse, urllib from BeautifulSoup import BeautifulSoup, Comment import soupselect soupselect.monkeypatch() __version__ = '1.9' __all__ = ['PremailerError', 'Premailer', 'transform'] class PremailerError(Exception): pass def _merge_styles(old, new, class_=''): """ if :: old = 'font-size:1px; color: red' and :: new = 'font-size:2px; font-weight: bold' then :: return 'color: red; font-size:2px; font-weight: bold'
#!/usr/bin/python # -*- coding: utf-8 -*- import urllib2 from BeautifulSoup import BeautifulSoup import soupselect; soupselect.monkeypatch() import json import re class Entidade: def __init__(self): self.nome = '' self.area = '' self.responsavel = '' self.endereco = '' self.contato = '' self.fone = '' self.fundacao = '' self.banco = '' self.agencia = '' self.conta = '' self.email = '' self.site = '' self.voluntarios_url = '' def to_json(self): return json.dumps(self.__dict__) class Voluntarios: count = 0 entidades = []
import sys import xbmc, xbmcgui, xbmcplugin import urllib2 import urllib import CommonFunctions common = CommonFunctions from t0mm0.common.addon import Addon from metahandler import metahandlers addon = Addon('plugin.video.1channel', sys.argv) from bs4 import BeautifulSoup import soupselect; soupselect.monkeypatch() import re import feedparser import json #sys.path.append("./xbmc_env/bin/python2.5") #sys.path.append("./xbmc_env/lib/python2.5") #sys.path.append("./xbmc_env/lib/python2.5/site-packages") # plugin modes MODE_FIRST = 10 MODE_SECOND = 20 MODE_SEARCH = 30 MODE_RESULT = 40 # parameter keys PARAMETER_KEY_MODE = "mode" # menu item names
import logging import urllib2 from bs4 import BeautifulSoup import simplejson from django.db import IntegrityError from models import User # plug soupselect # https://code.google.com/p/soupselect/ import soupselect; soupselect.monkeypatch(BeautifulSoup) def get_page(page_num): chrome_user_agent = "User-Agent:Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.33 (KHTML, like Gecko) Chrome/27.0.1430.0 Safari/537.33" users = [] request = urllib2.Request('http://vimeo.com/channels/staffpicks/subscribers/page:{0}/sort:datesubscribe'.format(page_num)) request.add_header('User-Agent', chrome_user_agent) opener = urllib2.build_opener() page = opener.open(request).read() soup = BeautifulSoup(page) # soup.prettify() for a in soup.findSelect("ol.js-browse_list li a"): name = a['href'] has_video_in_staff_pick = False url = 'http://vimeo.com{0}'.format(name) request = urllib2.Request(url) request.add_header('User-Agent', chrome_user_agent) opener = urllib2.build_opener() profile_page = opener.open(request).read()
# -*- coding: utf-8 -*- import collections import contextlib import cookielib import json import logging import re import urllib import urllib2 from bs4 import BeautifulSoup import soupselect from datetime import datetime from hoteltracker.utils import list_missing_args soupselect.monkeypatch() class HotelWebsite(object): REQUIRED_ARGS = ('name', 'pages', 'parameters', 'conditions') # TODO: Do we really need kwargs? Why not just define the kwargs? def __init__(self, *args, **kwargs): if not all(field in kwargs for field in self.REQUIRED_ARGS): error = list_missing_args(self.REQUIRED_ARGS, kwargs, message='Missing argument{s}: {args}') raise ValueError(error) for key, value in kwargs.iteritems(): new_key = '_{key}'.format(key=key) setattr(self, new_key, value) if not isinstance(self._pages, collections.Iterable):