def test_basic(): test_support.requires('network') from eventlet.green import urllib if test_support.verbose: print "test_basic ..." socket.RAND_status() socket.RAND_add("this is a random string", 75.0) try: f = urllib.urlopen('https://sf.net') except IOError, exc: if exc.errno == errno.ETIMEDOUT: raise test_support.ResourceDenied('HTTPS connection is timing out') else: raise
def test_basic(): test_support.requires('network') from eventlet.green import urllib if test_support.verbose: print "test_basic ..." socket.RAND_status() socket.RAND_add("this is a random string", 75.0) try: f = urllib.urlopen('https://sf.net') except IOError, exc: if exc.errno == errno.ETIMEDOUT: raise test_support.ResourceDenied('HTTPS connection is timing out') else: raise
def fetch_html(url): print "accessing url {0}".format(url) return urllib.urlopen(url).read()
from bs4 import BeautifulSoup import json import eventlet from eventlet.green import urllib from scraper.models import publicSkills manager = publicSkills.objects coursera_courses_json = json.loads(urllib.urlopen("https://www.coursera.org/maestro/api/topic/list?full=1").read()) coursera_class_base_url = "https://www.coursera.org/course/" exclusion_list = ['Microsoft Office', 'Microsoft Word', 'PowerPoint', 'Microsoft Excel', 'Photoshop', 'Word', 'Research', 'Teamwork', 'Customer Service', 'Time Management', 'Excel', 'Office', 'MIPS', 'Facebook', 'English', 'Editing', 'Outlook', 'Google Docs', 'Windows'] #skills we don't think are of interest to our users def get_url_list(xml): soup = BeautifulSoup(xml) person_list = soup.find_all("public-profile-url") url_list = [] for person in person_list: if person.text != "": url_list.append(person.text) return url_list def get_personal_skills(xml): soup = BeautifulSoup(xml) skill_list = soup.find_all("name") names = [skill.text for skill in skill_list] return names def fetch_html(url): print "accessing url {0}".format(url) return urllib.urlopen(url).read() def scrub_html(text):
def getpage(self): page = urllib.urlopen("http://code.google.com/p/eventlet-pyqt/").read().decode("utf-8") self.setHtml(page)
def getpage(self): page = urllib.urlopen( "http://code.google.com/p/eventlet-pyqt/").read().decode("utf-8") self.setHtml(page)
def getpage(self): page = urlopen("http://www.163.com/").read().decode("gbk", "replace") self.setPlainText(page) self.operations.kill("print_number")