def __init__( self, query, page=0, links=25, proxy=None, verbose=False ): """ Initialize Module Object -- Takes Up To 5 Arguments query - Query to search for page - Page to Start Parsing Results From (100 Per Page ) links - Number Of Links To Strip proxy - Proxy Server (None Specified By Default ) verbose - Set Verbosity """ self.config = { 'url' : 'http://www.google.com/search?num=100&hl=el&site=&source=hp&q={}&start={}', 'proxy' : proxy, 'query' : query, 'verbose' : verbose, 'page' : int( page ) * 100, 'links' : links, 're' : compile( '\/url\?q\=(.*)\&sa\=U\&ei\=' ), 'Agents' : [ 'Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.9) Gecko/20100317 SUSE/3.5.9-0.1.1 Firefox/3.5.9', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (.NET CLR 3.5.30729)', 'Opera/5.0 (Linux 2.0.38 i386; U) [en]', 'Opera/9.60 (Windows NT 5.1; U; de) Presto/2.1.1', ], } if PY_VER == 2: self.config[ 'c_jar' ] = cookiejar() self.getCookie() if proxy is not None: urllib2.install_opener( urllib2.build_opener( urllib2.ProxyHandler( { "http" : proxy } ) ) )
def __init__(self): self.urls = ["http://blog.csdn.net/index.html"] self.threads = [] self.blogurl = [] self.lock = threading.Condition() self.cj = cookielib.cookiejar() self.opener = urllib2.build_opener( urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self.cj), ) urllib2.install_opener(self.opener)
try: config = json.loads(open(CONFIGPATH, 'r').read()) except ValueError: sys.exit('"%s" is not a valid JSON file. Quitting.' % CONFIGPATH.replace(os.getenv('HOME'), '~')) else: sys.exit('Please create a configuration file at "%s". Quitting.' % configpath.replace(os.getenv('HOME'), '~')) if not config.has_key('password') or not config['password']: config['password'] = getpass('Password: '******'baseurl' : 'https://%s' % config['server']} urls.update({'login' : '%s/login/index.php' % urls['baseurl'], 'overview' : '%s/course/view.php?id={ID}' % urls['baseurl']}) blacklist = [] browser = mechanize.Browser() cookies = cookiejar() browser.set_cookiejar(cookies) if config.has_key('user-agent') and config['user-agent']: browser.addheaders = [('User-agent', config['user-agent'])] # This could be helpful for debugging... # browser.set_debug_http(True) # browser.set_debug_redirects(True) # browser.set_debug_responses(True) login(browser, urls['login'], config) print 'Login successful (%s@%s).' % (config['username'], config['server']) for course in courses.iterkeys(): content = browser.open(urls['overview'].replace('{ID}', str(courses[course]['id'])))