Ejemplo n.º 1
0
def test_basic():
    test_support.requires('network')

    from eventlet.green import urllib

    if test_support.verbose:
        print "test_basic ..."

    socket.RAND_status()
    socket.RAND_add("this is a random string", 75.0)

    try:
        f = urllib.urlopen('https://sf.net')
    except IOError, exc:
        if exc.errno == errno.ETIMEDOUT:
            raise test_support.ResourceDenied('HTTPS connection is timing out')
        else:
            raise
Ejemplo n.º 2
0
def test_basic():
    test_support.requires('network')

    from eventlet.green import urllib

    if test_support.verbose:
        print "test_basic ..."

    socket.RAND_status()
    socket.RAND_add("this is a random string", 75.0)

    try:
        f = urllib.urlopen('https://sf.net')
    except IOError, exc:
        if exc.errno == errno.ETIMEDOUT:
            raise test_support.ResourceDenied('HTTPS connection is timing out')
        else:
            raise
Ejemplo n.º 3
0
def fetch_html(url):
    print "accessing url {0}".format(url)
    return urllib.urlopen(url).read()
Ejemplo n.º 4
0
from bs4 import BeautifulSoup
import json
import eventlet
from eventlet.green import urllib
from scraper.models import publicSkills

manager = publicSkills.objects
coursera_courses_json = json.loads(urllib.urlopen("https://www.coursera.org/maestro/api/topic/list?full=1").read())
coursera_class_base_url = "https://www.coursera.org/course/"
exclusion_list = ['Microsoft Office', 'Microsoft Word', 'PowerPoint', 'Microsoft Excel', 'Photoshop', 'Word', 'Research', 'Teamwork', 'Customer Service', 'Time Management', 'Excel', 'Office', 'MIPS', 'Facebook', 'English', 'Editing', 'Outlook', 'Google Docs', 'Windows'] #skills we don't think are of interest to our users

def get_url_list(xml):
    soup = BeautifulSoup(xml)
    person_list = soup.find_all("public-profile-url")
    url_list = []
    for person in person_list:
        if person.text != "":
            url_list.append(person.text)
    return url_list

def get_personal_skills(xml):
    soup = BeautifulSoup(xml)
    skill_list = soup.find_all("name")
    names = [skill.text for skill in skill_list]
    return names

def fetch_html(url):
    print "accessing url {0}".format(url)
    return urllib.urlopen(url).read()

def scrub_html(text):    
Ejemplo n.º 5
0
 def getpage(self):
     page = urllib.urlopen("http://code.google.com/p/eventlet-pyqt/").read().decode("utf-8")
     self.setHtml(page)
Ejemplo n.º 6
0
 def getpage(self):
     page = urllib.urlopen(
         "http://code.google.com/p/eventlet-pyqt/").read().decode("utf-8")
     self.setHtml(page)
Ejemplo n.º 7
0
 def getpage(self):
     page = urlopen("http://www.163.com/").read().decode("gbk", "replace")
     self.setPlainText(page)
     self.operations.kill("print_number")