Example #1
0
        def __init__( self, query, page=0, links=25, proxy=None, verbose=False ):
                """
                        Initialize Module Object -- Takes Up To 5 Arguments

                        query   - Query to search for
                        page    - Page to Start Parsing Results From (100 Per Page )
                        links   - Number Of Links To Strip
                        proxy   - Proxy Server (None Specified By Default )
                        verbose - Set Verbosity
                """
                self.config = {
                        'url'       : 'http://www.google.com/search?num=100&hl=el&site=&source=hp&q={}&start={}',
                        'proxy'     : proxy,
                        'query'     : query,
                        'verbose'   : verbose,
                        'page'      : int( page ) * 100,
                        'links'     : links,
                        're'        : compile( '\/url\?q\=(.*)\&sa\=U\&ei\=' ), 
                        'Agents'    : [
                                        'Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.9) Gecko/20100317 SUSE/3.5.9-0.1.1 Firefox/3.5.9',
                                        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (.NET CLR 3.5.30729)',
                                        'Opera/5.0 (Linux 2.0.38 i386; U) [en]',
                                        'Opera/9.60 (Windows NT 5.1; U; de) Presto/2.1.1',
                                      ],
                }

                if PY_VER == 2:
                        self.config[ 'c_jar' ] = cookiejar()
                        self.getCookie()
                        if proxy is not None:
                                urllib2.install_opener( urllib2.build_opener( urllib2.ProxyHandler( { "http" : proxy } ) ) )
Example #2
0
 def __init__(self):
     self.urls = ["http://blog.csdn.net/index.html"]
     self.threads = []
     self.blogurl = []
     self.lock = threading.Condition()
     self.cj = cookielib.cookiejar()
     self.opener = urllib2.build_opener(
         urllib2.HTTPHandler(),
         urllib2.HTTPSHandler(),
         urllib2.HTTPCookieProcessor(self.cj),
     )
     urllib2.install_opener(self.opener)
	try:
		config = json.loads(open(CONFIGPATH, 'r').read())
	except ValueError:
		sys.exit('"%s" is not a valid JSON file. Quitting.' % CONFIGPATH.replace(os.getenv('HOME'), '~'))
else:
	sys.exit('Please create a configuration file at "%s". Quitting.' % configpath.replace(os.getenv('HOME'), '~'))

if not config.has_key('password') or not config['password']:
	config['password'] = getpass('Password: '******'baseurl' : 'https://%s' % config['server']}
urls.update({'login' : '%s/login/index.php' % urls['baseurl'], 'overview' : '%s/course/view.php?id={ID}' % urls['baseurl']})
blacklist = []

browser = mechanize.Browser()
cookies = cookiejar()
browser.set_cookiejar(cookies)
if config.has_key('user-agent') and config['user-agent']:
	browser.addheaders = [('User-agent', config['user-agent'])]

# This could be helpful for debugging...
# browser.set_debug_http(True)
# browser.set_debug_redirects(True)
# browser.set_debug_responses(True)

login(browser, urls['login'], config)
print 'Login successful (%s@%s).' % (config['username'], config['server'])

for course in courses.iterkeys():

	content = browser.open(urls['overview'].replace('{ID}', str(courses[course]['id'])))