Example #1
0
    def handle(self, *args, **options):
        all_sets = CardSet.objects.all()

        if len(all_sets) == 0:
            raise CommandError(NO_CARDSETS)

        if len(args) == 1:
            sets = CardSet.objects.filter(name__icontains=args[0], country="GB")
            if len(sets) < 1:
                raise CommandError(NO_SETS_FOUND.format(args[0], "\n".join([str(cs) for cs in all_sets])))
        else:
            sets = CardSet.objects.filter(country="GB")

        self.stdout.write("found card sets {0}\n".format(", ".join([str(cs) for cs in sets])))
        self.stdout.write("Started Scrape command\n")

        card_no = re.compile("\s*(?P<card_no>\d+)/(?P<count>\d+)\s*")
        card_list_h2 = re.compile("((C|c)ard (L|l)ist(s)*)|(Setlist)")
        energy_type = re.compile("(?P<energy_type>\w+) Energy \(((TCG)|(Basic))\)")

        for cs in sets:
            self.stdout.write("Processing '{0}'\n".format(cs.name))

            if cs.partial_url is None or cs.partial_url == "":
                raise CommandError("{0} does not have a valid URL".format(cs))

            html = json.load(urlopen(BASE_URL.format(API_URL.format(urlquote(cs.partial_url)))))["parse"]["text"]["*"]
            try:
                h2 = (
                    node
                    for node in BeautifulSoup(html).find_all("h2")
                    if node.find("span", "mw-headline") != None and node.find(text=card_list_h2) != None
                ).next()
                rows = (
                    node.find_all("tr")
                    for node in h2.next_siblings
                    if not isinstance(node, NavigableString)
                    and node.find("b") != None
                    and node.find("b").find(text=cs.name) != None
                ).next()
            except StopIteration:
                self.stdout.write("'{0}' does not have any valid cards\n".format(cs.name))
                continue

            cs.card_set.all().delete()

            for tr in rows:
                td = tr.find("td")
                if td is not None and td != -1:
                    match = card_no.match(td.text)
                    if match != None and int(match.group("count")) == cs.official_count:

                        node = td.next_sibling.next_sibling
                        name_node = node.next_sibling.next_sibling
                        type_node = name_node.next_sibling.next_sibling
                        rarity_node = type_node.next_sibling.next_sibling

                        if rarity_node.a is None and rarity_node.a != -1:
                            rarity_name = "None"
                        else:
                            rarity_name = rarity_node.a["title"].strip()

                        rarity, created = Rarity.objects.get_or_create(name=rarity_name)

                        if created and rarity_node.a != None and rarity_node.a.img != None:

                            logo_temp = NamedTemporaryFile()
                            rarity_url = rarity_node.a.img["src"]
                            logo_temp.write(urlopen(rarity_url).read())
                            logo_ext = urlparse(rarity_url).path.split(".")[-1]
                            logo_filename = "{0}.{1}".format(str(rarity.id), logo_ext)
                            logo_temp.flush()
                            rarity.logo.save(logo_filename, File(logo_temp))

                        if type_node.a is not None and type_node.a != -1:
                            card_type_name = type_node.a["title"].strip()
                            t_match = energy_type.match(card_type_name)
                            if t_match != None:
                                card_type_name = t_match.group("energy_type")
                        elif (
                            type_node.img is not None
                            and type_node.img != -1
                            and type_node.img["alt"] == "Dragon-attack.png"
                        ):
                            card_type_name = "Dragon"
                        else:
                            try:
                                card_type_name = CARD_TYPE_MAP[type_node.text.strip()]
                            except KeyError:
                                self.stderr.write("Unrecognised type {0}".format(str(type_node)))

                        card_type, created = CardType.objects.get_or_create(name=card_type_name)

                        if created and type_node.a != None and type_node.a.img != None:

                            logo_temp = NamedTemporaryFile()
                            card_type_url = type_node.a.img["src"]
                            logo_temp.write(urlopen(card_type_url).read())
                            logo_ext = urlparse(card_type_url).path.split(".")[-1]
                            logo_filename = "{0}.{1}".format(str(card_type.id), logo_ext)
                            logo_temp.flush()
                            card_type.logo.save(logo_filename, File(logo_temp))

                        card = Card(
                            card_no=match.group("card_no"),
                            card_set=cs,
                            name=name_node.text.encode("utf-8").strip(),
                            card_type=card_type,
                            rarity=rarity,
                        )

                        if name_node.a is not None and name_node.a != -1:
                            card.url = BASE_URL.format(name_node.a["href"][1:])

                        card.save()
                        self.stdout.write(
                            "{0}/{1} - {2} ({3})\n".format(
                                str(card.card_no), str(cs.official_count), card.name, cs.name
                            )
                        )

            self.stdout.write("total cards {0}\n".format(str(cs.card_set.all().count())))
Example #2
0
    def handle(self, *args, **options):
        all_sets = CardSet.objects.all()

        if len(all_sets) == 0:
            raise CommandError(NO_CARDSETS)

        if len(args) == 1:
            sets = CardSet.objects.filter(name__icontains=args[0],
                country="GB")
            if len(sets) < 1:
                raise CommandError(NO_SETS_FOUND.format(args[0],
                    "\n".join([str(cs) for cs in all_sets])))
        else:
            sets = CardSet.objects.filter(country="GB")

        self.stdout.write("found card sets {0}\n".format(
            ", ".join([str(cs) for cs in sets])))
        self.stdout.write("Started Scrape command\n")

        card_no = re.compile("\s*(?P<card_no>\d+)/(?P<count>\d+)\s*")
        card_list_h2 = re.compile("((C|c)ard (L|l)ist(s)*)|(Setlist)")
        energy_type = re.compile(
            "(?P<energy_type>\w+) Energy \(((TCG)|(Basic))\)")

        for cs in sets:
            self.stdout.write("Processing '{0}'\n".format(cs.name))

            if cs.partial_url is None or cs.partial_url == "":
                raise CommandError("{0} does not have a valid URL".format(cs))

            html = json.load(urlopen(
                BASE_URL.format(
                    API_URL.format(
                        urlquote(cs.partial_url)))))['parse']['text']['*']
            try:
                h2 = (node
                      for node in BeautifulSoup(html).find_all("h2")
                      if node.find("span", "mw-headline") != None and
                          node.find(text=card_list_h2) != None).next()
                rows = (node.find_all("tr")
                       for node in h2.next_siblings
                       if not isinstance(node, NavigableString) and
                           node.find("b") != None and
                           node.find("b").find(text=cs.name) != None).next()
            except StopIteration:
                self.stdout.write(
                    "'{0}' does not have any valid cards\n".format(cs.name))
                continue

            cs.card_set.all().delete()

            for tr in rows:
                td = tr.find("td")
                if td is not None and td != -1:
                    match = card_no.match(td.text)
                    if match != None \
                        and int(match.group("count")) == cs.official_count:

                        node = td.next_sibling.next_sibling
                        name_node = node.next_sibling.next_sibling
                        type_node = name_node.next_sibling.next_sibling
                        rarity_node = type_node.next_sibling.next_sibling

                        if rarity_node.a is None and rarity_node.a != -1:
                            rarity_name = "None"
                        else:
                            rarity_name = rarity_node.a['title'].strip()

                        rarity, created = Rarity.objects.get_or_create(
                            name=rarity_name)

                        if created and rarity_node.a != None \
                            and rarity_node.a.img != None:

                            logo_temp = NamedTemporaryFile()
                            rarity_url = rarity_node.a.img['src']
                            logo_temp.write(urlopen(rarity_url).read())
                            logo_ext = urlparse(rarity_url).path.split('.')[-1]
                            logo_filename="{0}.{1}".format(str(rarity.id),
                                logo_ext)
                            logo_temp.flush()
                            rarity.logo.save(logo_filename, File(logo_temp))

                        if type_node.a is not None and type_node.a != -1:
                            card_type_name = type_node.a['title'].strip()
                            t_match = energy_type.match(card_type_name)
                            if t_match != None:
                                card_type_name = t_match.group("energy_type")
                        elif type_node.img is not None and type_node.img != -1 \
                            and type_node.img['alt'] == "Dragon-attack.png":
                            card_type_name = "Dragon"
                        else:
                            try:
                                card_type_name = CARD_TYPE_MAP[
                                    type_node.text.strip()]
                            except KeyError:
                                self.stderr.write(
                                    "Unrecognised type {0}".format(
                                    str(type_node)))

                        card_type, created =  CardType.objects.get_or_create(
                            name=card_type_name)

                        if created and type_node.a != None \
                            and type_node.a.img != None:

                            logo_temp = NamedTemporaryFile()
                            card_type_url = type_node.a.img['src']
                            logo_temp.write(urlopen(card_type_url).read())
                            logo_ext = urlparse(card_type_url
                                ).path.split('.')[-1]
                            logo_filename="{0}.{1}".format(str(card_type.id),
                                    logo_ext)
                            logo_temp.flush()
                            card_type.logo.save(logo_filename, File(logo_temp))

                        card = Card(card_no=match.group("card_no"),
                            card_set=cs,
                            name=name_node.text.encode('utf-8').strip(),
                            card_type=card_type, rarity=rarity)

                        if name_node.a is not None and name_node.a != -1:
                            card.url = BASE_URL.format(name_node.a['href'][1:])

                        card.save()
                        self.stdout.write("{0}/{1} - {2} ({3})\n".format(
                            str(card.card_no), str(cs.official_count),
                            card.name, cs.name))

            self.stdout.write("total cards {0}\n".format(
                str(cs.card_set.all().count())))