Ejemplo n.º 1
0
    def test_cartoonists_include_exclude_filter(self):
        """
        Test combined black/whitelisting. Exclude has priority.
        """
        exclude_filter = ["xkcd_com"]
        include_filter = ["ruthe_de", "nichtlustig_de", "xkcd_com"]
        got_cartoonists = []

        for x in range(0, 99):
            c = Cartoons.get_random_cartoon(include=include_filter,
                                            exclude=exclude_filter)
            self.assertTrue(
                all(item in list(c.keys())
                    for item in ['img', 'credits', 'website']),
                msg="Every cartoon should contain this keys + some optional: "
                + str(['img', 'credits', 'website']))
            self.assertTrue(c["name"] not in exclude_filter,
                            msg="no excluded cartoonists")
            if c["name"] not in got_cartoonists:
                got_cartoonists.append(c["name"])

        asserted_cartoonists = ["ruthe_de", "nichtlustig_de"]

        self.assertTrue(all(item in got_cartoonists
                            for item in asserted_cartoonists),
                        msg="Not all Cartoonists in filter showed a cartoon:" +
                        str(got_cartoonists))
        self.assertTrue(len(got_cartoonists) == len(asserted_cartoonists))
Ejemplo n.º 2
0
    def test_cartoonists_include_filter(self):
        """
        Test whitelisting.
        """
        cartoonists_filter = ["xkcd_com", "smbc_comics_com"]
        got_cartoonists = []

        for x in range(0, 99):
            c = Cartoons.get_random_cartoon(include=cartoonists_filter)
            self.assertIsInstance(c, dict)
            self.assertTrue(
                all(item in list(c.keys())
                    for item in ['img', 'credits', 'website']),
                msg="Every cartoon should contain this keys + some optional: "
                + str(['img', 'credits', 'website']))
            self.assertTrue(c["name"] in cartoonists_filter,
                            msg="only filtered cartoonists")

            if c["name"] not in got_cartoonists:
                got_cartoonists.append(c["name"])

        self.assertTrue(all(item in got_cartoonists
                            for item in cartoonists_filter),
                        msg="Not all Cartoonists in filter showed a cartoon:" +
                        str(got_cartoonists))
        self.assertTrue(len(cartoonists_filter) == len(got_cartoonists))
Ejemplo n.º 3
0
    def test_language_filter(self):
        """
        Test language filtering.
        """

        for x in range(0, 99):
            languages = ["en"]
            c = Cartoons.get_random_cartoon(languages=languages)
            self.assertTrue(c["language"] == languages[0],
                            msg="Language en:" + str(c))

            languages = ["de"]
            c = Cartoons.get_random_cartoon(languages=languages)
            self.assertTrue(c["language"] == languages[0],
                            msg="Language en:" + str(c))

            with self.assertRaises(CartoonError):
                c = Cartoons.get_random_cartoon(languages=languages,
                                                include=["xkcd_com"])
Ejemplo n.º 4
0
 def test_random(self):
     """
     We should get a dict with random cartoon values.
     """
     for x in range(0, 99):
         c = Cartoons.get_random_cartoon()
         self.assertIsInstance(c, dict)
         self.assertTrue(
             all(item in list(c.keys())
                 for item in ['img', 'credits', 'website']),
             msg="Every cartoon should contain this keys + some optional" +
             str(['img', 'credits', 'website']))
Ejemplo n.º 5
0
    def test_cartoonists_exclude_filter(self):
        """
        Test blacklisting.
        """
        exclude_filter = ["xkcd_com", "smbc_comics_com"]

        for x in range(0, 99):
            c = Cartoons.get_random_cartoon(exclude=exclude_filter)
            self.assertTrue(
                all(item in list(c.keys())
                    for item in ['img', 'credits', 'website']),
                msg="Every cartoon should contain this keys + some optional: "
                + str(['img', 'credits', 'website']))
            self.assertTrue(c["name"] not in exclude_filter,
                            msg=c["name"] + " shouldn't be in " +
                            str(exclude_filter))
Ejemplo n.º 6
0
"""A simple example of cartoonista to show all possible options."""
from pprint import pprint
from cartoonista import Cartoons

# Get a list of all cartoon include and there infos
cartoonists = Cartoons.get_all_cartoonists()
print("All include and there infos:")
print(cartoonists)
print("Nr of include:", len(cartoonists))
nr = 0
for c in cartoonists:
    nr = nr + c["cartoon_count"]
print("Nr of cartoons:", nr)

print("Random without filter", Cartoons.get_random_cartoon())
print("Only ruthe.de or xkcd.com",
      Cartoons.get_random_cartoon(include=["xkcd_com", "ruthe_de"]))
print("Only english", Cartoons.get_random_cartoon(languages=["en"]))
print(
    "Filter given include list by language: xkcd.com, ruthe.de, nichtlustig.de and en",
    Cartoons.get_random_cartoon(
        include=["xkcd_com", "ruthe_de", "nichtlustig_de"], languages=["en"]))
print("Single Cartoonist: schoenescheisse.de",
      Cartoons.get_random_cartoon(include=["schoenescheisse_de"]))

print("Filter by tag: exclude offensive",
      Cartoons.get_random_cartoon(
          include=["xkcd_com", "explosm_net", "martin-perscheid_de"],
          exclude_tags=[
              "offensive"
          ]))  # returns only xkcd, cause they aren't offensive (to me ;) )
Ejemplo n.º 7
0
    def do_GET(self):
        parsed_path = parse.urlparse(self.path)
        message_parts = [
            'CLIENT VALUES:',
            'client_address={} ({})'.format(self.client_address,
                                            self.address_string()),
            'command={}'.format(self.command),
            'path={}'.format(self.path),
            'real path={}'.format(parsed_path.path),
            'query={}'.format(parsed_path.query),
            'request_version={}'.format(self.request_version),
            '',
            'SERVER VALUES:',
            'server_version={}'.format(self.server_version),
            'sys_version={}'.format(self.sys_version),
            'protocol_version={}'.format(self.protocol_version),
            '',
            'HEADERS RECEIVED:',
        ]
        print(message_parts)
        if self.path == "/":
            self.send_response(200)
            self.send_header('Content-Type', 'text/html; charset=utf-8')
            self.end_headers()
            cartoon = Cartoons.get_random_cartoon()
            if cartoon.get("txt", ""):
                cartoon["txt"] = cartoon["txt"] + "<br>"

            with open("cartoon.html") as f:
                html = Template(f.read())
                html = html.safe_substitute(img=cartoon['img'],
                                            website=cartoon['website'],
                                            credits=cartoon['credits'],
                                            title=cartoon.get("title", ""),
                                            txt=cartoon.get("txt", ""))
                self.wfile.write(html.encode('utf-8'))
        elif self.path == "/cartoon.css":
            with open("cartoon.css") as f:
                self.send_response(200)
                self.send_header('Content-Type', 'text/css; charset=utf-8')
                self.end_headers()
                self.wfile.write(f.read().encode('utf-8'))
        elif self.path == "/cartoon.js":
            with open("cartoon.js") as f:
                self.send_response(200)
                self.send_header('Content-Type',
                                 'text/javascript; charset=utf-8')
                self.end_headers()
                self.wfile.write(f.read().encode('utf-8'))
        elif self.path == "/rest/cartoons/include":
            self.send_response(200)
            self.send_header('Content-Type', 'application/json; charset=utf-8')
            self.end_headers()
            output = json.dumps(Cartoons.get_all_cartoonists())
            self.wfile.write(output.encode('utf-8'))
        elif self.path == "/rest/cartoons/cartoon":
            self.send_response(200)
            self.send_header('Content-Type', 'application/json; charset=utf-8')
            self.end_headers()
            output = json.dumps(Cartoons.get_random_cartoon())
            self.wfile.write(output.encode('utf-8'))
        else:
            self.send_error(404, message="Not found")
Ejemplo n.º 8
0
import logging
from cartoonista import Cartoons

logging.basicConfig(level=logging.INFO)  # without you don't see the progress

Cartoons.start_scraping(cartoonists=[])