Example #1
0
def run():
	baseurl = "http://www.gaybydegree.org.uk/index.php?dir=university&task=university"
	print green("> GayByDegree.co.uk scrapping starts now")

	checklist_str = {
		"Policy that protects LGB students from bullying" : "policy",
		"Compulsory Staff Training on LGB issues" : "training",
		"Society for LGB students" : "society",
		"Info for students on LGB issues" : "info",
		"Stonewall Diversity Champion" : "diversity",
		"Events for LGB students" : "events",
		"Explicit welfare support for LGB students" : "welfare",
		"Consultation with LGB students" : "consulation",
		"Specific career advice for LGB students" : "career"
	}

	soup = common.fetch_page(baseurl, "html5lib") # awkward
	shit = ['university', 'of', 'the']
	for uni in common.universites.values():
		print green(">> %s..." % uni),
		# Sanatize into what the website uses
		uni = ' '.join( filter( lambda x: x.lower() not in shit, uni.split(' ') ) )
		# Now go for it!
		l = soup.find("a", text=re.compile(uni) )
		link = htmlentities.decode(l['href'])
		
		page_soup = common.fetch_page("http://www.gaybydegree.org.uk/%s" % link, "html5lib")
		print green(bold(" got information"))
		
		checklist = {}
		for (txt, db_term) in checklist_str.items():
			p = page_soup.find(text=txt)
			i = p.parent.find("img")
			has = "greentick" in i['src']
			if has:
				print green(">>> Has: %s" % txt)
			else:
				print red(">>> Does not have: %s" % txt)
			checklist[db_term] = has
		print checklist
Example #2
0
File: _chuck.py Project: 0asa/chuck
def facts():    
    payload = { 'escape': 'javascript' }
    fact = rq.get(chuck_url,params=payload).json()['value']['joke']
    return decode(fact)
 def test_should_decode_basic_entities(self):
     self.assertEqual('&', htmlentities.decode('&'))
     self.assertEqual('"', htmlentities.decode('"'))
     self.assertEqual('<', htmlentities.decode('&lt;'))
Example #4
0
File: _chuck.py Project: 0asa/chuck
def facts():
    fact = rq.get(base_url+defaults).json()[0]['fact'].replace('&#039;','\'')
    return decode(fact)
 def test_should_decode_utf8_accents(self):
     self.assertEqual(u'é', htmlentities.decode('&eacute;'))
     self.assertEqual(u'ê', htmlentities.decode('&ecirc;'))
Example #6
0
    word1 = word1.replace(" ","\ ").replace("?","\?").replace("(","\(").replace("*","\*").replace(")","\)")
    process = subprocess.Popen("grep -lir '"+word1+"' "+path_change+"*",
                               stdout=subprocess.PIPE, shell=True)
    (files,  error) = process.communicate()
    files = files.split('\n')
    files.pop()
    if not auto_complete:
        print('(y/n)\nDo you want change \''+word1+'\' in...\n')
    for file in files:
        if not file in(script_name, file_with_words):
            if not auto_complete:
                answer = raw_input(file+'? ')
            else:
                print file + '\n'
                answer = ''
            if answer in ('yes','y') or auto_complete:
                f = codecs.open(file, encoding='iso-8859-15', mode='r')
                data = f.read()
                f.close()
                data = htmlentities.decode(data)
                data = ''.join((c for c in unicodedata.normalize('NFD', data) if unicodedata.category(c) != 'Mn'))
                data = re.sub(word1,word2,data)
                if not flag_complete:
                  data = re.sub(word1.lower(),word2.lower(),data)
                  data = re.sub(word1.upper(),word2.upper(),data)
                f = codecs.open(file, encoding='iso-8859-15', mode='w')
                f.write(data)
                f.close()

print('\nCOMPLETED THE CHANGE OF WORDS')
Example #7
0
 def test_should_decode_basic_entities(self):
     self.assertEqual('&', htmlentities.decode('&amp;'))
     self.assertEqual('"', htmlentities.decode('&quot;'))
     self.assertEqual('<', htmlentities.decode('&lt;'))
Example #8
0
 def test_should_decode_utf8_accents(self):
     self.assertEqual(u'é', htmlentities.decode('&eacute;'))
     self.assertEqual(u'ê', htmlentities.decode('&ecirc;'))
Example #9
0
File: _chuck.py Project: 0asa/chuck
def facts():
    payload = {'escape': 'javascript'}
    fact = rq.get(chuck_url, params=payload).json()['value']['joke']
    return decode(fact)