def test_cartoonists_include_exclude_filter(self): """ Test combined black/whitelisting. Exclude has priority. """ exclude_filter = ["xkcd_com"] include_filter = ["ruthe_de", "nichtlustig_de", "xkcd_com"] got_cartoonists = [] for x in range(0, 99): c = Cartoons.get_random_cartoon(include=include_filter, exclude=exclude_filter) self.assertTrue( all(item in list(c.keys()) for item in ['img', 'credits', 'website']), msg="Every cartoon should contain this keys + some optional: " + str(['img', 'credits', 'website'])) self.assertTrue(c["name"] not in exclude_filter, msg="no excluded cartoonists") if c["name"] not in got_cartoonists: got_cartoonists.append(c["name"]) asserted_cartoonists = ["ruthe_de", "nichtlustig_de"] self.assertTrue(all(item in got_cartoonists for item in asserted_cartoonists), msg="Not all Cartoonists in filter showed a cartoon:" + str(got_cartoonists)) self.assertTrue(len(got_cartoonists) == len(asserted_cartoonists))
def test_cartoonists_include_filter(self): """ Test whitelisting. """ cartoonists_filter = ["xkcd_com", "smbc_comics_com"] got_cartoonists = [] for x in range(0, 99): c = Cartoons.get_random_cartoon(include=cartoonists_filter) self.assertIsInstance(c, dict) self.assertTrue( all(item in list(c.keys()) for item in ['img', 'credits', 'website']), msg="Every cartoon should contain this keys + some optional: " + str(['img', 'credits', 'website'])) self.assertTrue(c["name"] in cartoonists_filter, msg="only filtered cartoonists") if c["name"] not in got_cartoonists: got_cartoonists.append(c["name"]) self.assertTrue(all(item in got_cartoonists for item in cartoonists_filter), msg="Not all Cartoonists in filter showed a cartoon:" + str(got_cartoonists)) self.assertTrue(len(cartoonists_filter) == len(got_cartoonists))
def test_language_filter(self): """ Test language filtering. """ for x in range(0, 99): languages = ["en"] c = Cartoons.get_random_cartoon(languages=languages) self.assertTrue(c["language"] == languages[0], msg="Language en:" + str(c)) languages = ["de"] c = Cartoons.get_random_cartoon(languages=languages) self.assertTrue(c["language"] == languages[0], msg="Language en:" + str(c)) with self.assertRaises(CartoonError): c = Cartoons.get_random_cartoon(languages=languages, include=["xkcd_com"])
def test_random(self): """ We should get a dict with random cartoon values. """ for x in range(0, 99): c = Cartoons.get_random_cartoon() self.assertIsInstance(c, dict) self.assertTrue( all(item in list(c.keys()) for item in ['img', 'credits', 'website']), msg="Every cartoon should contain this keys + some optional" + str(['img', 'credits', 'website']))
def test_cartoonists_exclude_filter(self): """ Test blacklisting. """ exclude_filter = ["xkcd_com", "smbc_comics_com"] for x in range(0, 99): c = Cartoons.get_random_cartoon(exclude=exclude_filter) self.assertTrue( all(item in list(c.keys()) for item in ['img', 'credits', 'website']), msg="Every cartoon should contain this keys + some optional: " + str(['img', 'credits', 'website'])) self.assertTrue(c["name"] not in exclude_filter, msg=c["name"] + " shouldn't be in " + str(exclude_filter))
"""A simple example of cartoonista to show all possible options.""" from pprint import pprint from cartoonista import Cartoons # Get a list of all cartoon include and there infos cartoonists = Cartoons.get_all_cartoonists() print("All include and there infos:") print(cartoonists) print("Nr of include:", len(cartoonists)) nr = 0 for c in cartoonists: nr = nr + c["cartoon_count"] print("Nr of cartoons:", nr) print("Random without filter", Cartoons.get_random_cartoon()) print("Only ruthe.de or xkcd.com", Cartoons.get_random_cartoon(include=["xkcd_com", "ruthe_de"])) print("Only english", Cartoons.get_random_cartoon(languages=["en"])) print( "Filter given include list by language: xkcd.com, ruthe.de, nichtlustig.de and en", Cartoons.get_random_cartoon( include=["xkcd_com", "ruthe_de", "nichtlustig_de"], languages=["en"])) print("Single Cartoonist: schoenescheisse.de", Cartoons.get_random_cartoon(include=["schoenescheisse_de"])) print("Filter by tag: exclude offensive", Cartoons.get_random_cartoon( include=["xkcd_com", "explosm_net", "martin-perscheid_de"], exclude_tags=[ "offensive" ])) # returns only xkcd, cause they aren't offensive (to me ;) )
def do_GET(self): parsed_path = parse.urlparse(self.path) message_parts = [ 'CLIENT VALUES:', 'client_address={} ({})'.format(self.client_address, self.address_string()), 'command={}'.format(self.command), 'path={}'.format(self.path), 'real path={}'.format(parsed_path.path), 'query={}'.format(parsed_path.query), 'request_version={}'.format(self.request_version), '', 'SERVER VALUES:', 'server_version={}'.format(self.server_version), 'sys_version={}'.format(self.sys_version), 'protocol_version={}'.format(self.protocol_version), '', 'HEADERS RECEIVED:', ] print(message_parts) if self.path == "/": self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() cartoon = Cartoons.get_random_cartoon() if cartoon.get("txt", ""): cartoon["txt"] = cartoon["txt"] + "<br>" with open("cartoon.html") as f: html = Template(f.read()) html = html.safe_substitute(img=cartoon['img'], website=cartoon['website'], credits=cartoon['credits'], title=cartoon.get("title", ""), txt=cartoon.get("txt", "")) self.wfile.write(html.encode('utf-8')) elif self.path == "/cartoon.css": with open("cartoon.css") as f: self.send_response(200) self.send_header('Content-Type', 'text/css; charset=utf-8') self.end_headers() self.wfile.write(f.read().encode('utf-8')) elif self.path == "/cartoon.js": with open("cartoon.js") as f: self.send_response(200) self.send_header('Content-Type', 'text/javascript; charset=utf-8') self.end_headers() self.wfile.write(f.read().encode('utf-8')) elif self.path == "/rest/cartoons/include": self.send_response(200) self.send_header('Content-Type', 'application/json; charset=utf-8') self.end_headers() output = json.dumps(Cartoons.get_all_cartoonists()) self.wfile.write(output.encode('utf-8')) elif self.path == "/rest/cartoons/cartoon": self.send_response(200) self.send_header('Content-Type', 'application/json; charset=utf-8') self.end_headers() output = json.dumps(Cartoons.get_random_cartoon()) self.wfile.write(output.encode('utf-8')) else: self.send_error(404, message="Not found")
import logging from cartoonista import Cartoons logging.basicConfig(level=logging.INFO) # without you don't see the progress Cartoons.start_scraping(cartoonists=[])