def upload_card_set(request): if request.method == 'POST': form = UploadCardSetForm(request.POST, request.FILES) if form.is_valid(): # Read cards file try: cards = Mnemosyne2Cards.read(request.FILES['file']) except Exception: # FIXME be more specific about what to catch cards = None #print >>sys.stderr, cards if cards: # Create new card set card_set = CardSet(name=form.cleaned_data['name']) card_set.save() # Create cards for question, answer in cards: card = Card(card_set=card_set, question=question, answer=answer) card.save() # TODO: provide feedback for invalid uploads else: form = UploadCardSetForm() return render_to_response('upload.html', {'form': form}, context_instance=RequestContext(request))
def handle(self, *args, **options): self.stdout.write("start scraping from Bulbapedia...\n") p = re.compile("official (count|total) (?P<count>\d+)") CardSet.objects.all().delete() html = json.load(urlopen( BASE_URL.format(CARDSET_PAGE)))['parse']['text']['*'] set_tbl = BeautifulSoup(html).find("table").find("table").find_all( "tr")[1:] for r in set_tbl: data = r.find_all("td") logo_temp = NamedTemporaryFile() try: eng_cs = CardSet(country="GB") eng_cs.set_no = int(r.find("th").text.strip()) eng_cs.name = data[2].text.strip() try: logo_url = data[1].a.img['src'] logo_temp.write(urlopen(logo_url).read()) logo_ext = urlparse(logo_url).path.split('.')[-1] logo_filename = "{0}_{1}.{2}".format( eng_cs.country, eng_cs.set_no, logo_ext) logo_temp.flush() eng_cs.logo.save(logo_filename, File(logo_temp)) except (TypeError, AttributeError): pass try: eng_cs.partial_url = data[2].a['title'] except TypeError: pass eng_cs.release = datetime.strptime(data[6].text.strip(), "%B %d, %Y") count = data[4].find("span", "explain") try: eng_cs.official_count = int( p.search(count['title']).group("count")) except (TypeError, AttributeError): if data[4].text.strip() != "": eng_cs.official_count = int(data[4].text.strip()) self.stdout.write("scraped: {0}\n".format(eng_cs)) eng_cs.save() except ValueError: # If there is no valid set_no pass try: jap_cs = CardSet(country="JP") jap_cs.set_no = int(data[0].text.strip()) jap_cs.name = data[3].text.strip() try: logo_url = data[1].a.img['src'] logo_temp.write(urlopen(logo_url).read()) logo_ext = urlparse(logo_url).path.split('.')[-1] logo_filename = "{0}_{1}.{2}".format( jap_cs.country, jap_cs.set_no, logo_ext) logo_temp.flush() jap_cs.logo.save(logo_filename, File(logo_temp)) except (TypeError, AttributeError): pass try: jap_cs.partial_url = data[2].a['title'] except TypeError: pass jap_cs.release = datetime.strptime(data[7].text.strip(), "%B %d, %Y") count = data[5].find("span", "explain") try: jap_cs.official_count = int( p.search(count['title']).group("count")) except (TypeError, AttributeError): if data[5].text.strip() != "": jap_cs.official_count = int(data[5].text.strip()) self.stdout.write("scraped: {0}\n".format(jap_cs)) jap_cs.save() except ValueError: # If there is no valid set_no pass self.stdout.write("total sets {0}\n".format( str(CardSet.objects.all().count())))
def doit(): dbname = ':memory:' dbname = os.path.join(DATA_DIR, 'tmpdb.db') db = sqlite3.connect(dbname) c = db.cursor() # TODO read from spreadsheet into temp database "dbname" settings.DATABASES['default']['AUTOCOMMIT'] = False print dir(django.db.transaction) print dir(django.db.transaction.connections) django.db.transaction.commit_manually() #django.db.transaction.set_autocommit(False) """this is a terrible way to delete stuff.... Game.objects.all().delete() BlackCard.objects.all().delete() WhiteCard.objects.all().delete() CardSet.objects.all().delete() # So instead use raw SQL """ dmc = connection.cursor() # Django Model Database Cursor for tablename in ['card_set_white_card', 'cards_game', 'cards_player', 'black_cards', 'card_set', 'card_set_black_card', 'white_cards']: dmc.execute('DELETE FROM %s' % tablename) dmc.close() cardset_dict = {} for card_ver in 'v1.0', 'v1.2', 'v1.3', 'v1.4': cardset = CardSet(name=card_ver, description=card_ver) cardset.save() cardset_dict[card_ver] = cardset #c.execute(""" select b."Text" as text, b."Special" as special, b."v1" as v10, b."v1.2" as v12, b."v1.3" as v13, b."v1.4" as v14 from "Main Deck Black" b order by text LIMIT 3""") c.execute(""" select b."Text" as text, b."Special" as special, b."v1" as v10, b."v1.2" as v12, b."v1.3" as v13, b."v1.4" as v14 from "Main Deck Black" b order by text """) print c.description for row_id, row in enumerate(c.fetchall(), 1): draw = 0 print row_id, row card_text = row[0] special = row[1] v10 = row[2] v12 = row[3] v13 = row[4] v14 = row[5] if v10: # sync with other naming conventions v10 = 'v1.0' print (card_text, special, v10, v12, v13, v14) card_text = card_text.replace('______', DEFAULT_BLANK_MARKER) if '_' in card_text: raise NotImplementedError('found an underscore, this may not be a real problem') pick = card_text.count(DEFAULT_BLANK_MARKER) if pick < 1: pick = 1 if special: print row if special == 'PICK 2': pick = 2 elif special == 'DRAW 2, PICK 3': draw = 2 pick = 3 else: raise NotImplementedError('unrecognized special') watermark = v10 or v12 or v13 or v14 # pick the first version it showed up in (or we could leave blank) black_card = BlackCard(text=card_text, draw=draw, pick=pick, watermark=watermark) print black_card black_card.save() tmp_dict = {'v1.0':v10, 'v1.2':v12, 'v1.3':v13, 'v1.4':v14} for card_ver in tmp_dict: #print card_ver, tmp_dict[card_ver] if tmp_dict[card_ver]: cardset = cardset_dict[card_ver] cardset.black_card.add(black_card) #c.execute(""" select w."Text" as text, w."v1.0" as v10, w."v1.2" as v12, w."v1.3" as v13, w."v1.4" as v14 from "Main Deck White" w order by text LIMIT 5""") c.execute(""" select w."Text" as text, w."v1.0" as v10, w."v1.2" as v12, w."v1.3" as v13, w."v1.4" as v14 from "Main Deck White" w order by text""") print c.description for row_id, row in enumerate(c.fetchall(), 1): print row_id, row card_text = row[0] v10 = row[1] v12 = row[2] v13 = row[3] v14 = row[4] watermark = v10 or v12 or v13 or v14 # pick the first version it showed up in (or we could leave blank) white_card = WhiteCard(text=card_text, watermark=watermark) print white_card white_card.save() tmp_dict = {'v1.0':v10, 'v1.2':v12, 'v1.3':v13, 'v1.4':v14} for card_ver in tmp_dict: #print card_ver, tmp_dict[card_ver] if tmp_dict[card_ver]: cardset = cardset_dict[card_ver] cardset.white_card.add(white_card) #c.execute(""" select b."col2" as text, b.col3 as special, b.col4 as expansion_name from "Expansions Black" b where text NOT NULL and text != '' and text != 'Text' and expansion_name NOT NULL and expansion_name != '' order by text LIMIT 15""") c.execute(""" select b."col2" as text, b.col3 as special, b.col4 as expansion_name from "Expansions Black" b where text NOT NULL and text != '' and text != 'Text' and expansion_name NOT NULL and expansion_name != '' order by text""") print c.description for row_id, row in enumerate(c.fetchall(), 1): draw = 0 print row_id, row card_text = row[0] special = row[1] expansion_name = row[2] card_text = replace_blank(card_text) pick = card_text.count(DEFAULT_BLANK_MARKER) if pick < 1: pick = 1 if special: print row if special == 'PICK 2': pick = 2 elif special == 'DRAW 2, PICK 3': draw = 2 pick = 3 else: raise NotImplementedError('unrecognized special') cardset = cardset_dict.get(expansion_name) # This shouldn't be needed, should have created earlier with black card expansion if cardset is None: cardset = CardSet(name=expansion_name, description=expansion_name, base_deck=False) cardset.save() cardset_dict[expansion_name] = cardset watermark = expansion_name # could leave it blank black_card = BlackCard(text=card_text, draw=draw, pick=pick, watermark=watermark) print black_card black_card.save() cardset.black_card.add(black_card) # TODO handle "[italic]", etc. #c.execute(""" select w."col2" as text, w.col3 as expansion_name from "Expansions White" w where text NOT NULL and text != '' and expansion_name NOT NULL and expansion_name != '' order by expansion_name, text LIMIT 15""") c.execute(""" select w.col2 as text, w.col3 as expansion_name from "Expansions White" w where text NOT NULL and text != '' and text != 'Text' and expansion_name NOT NULL and expansion_name != '' order by expansion_name, text""") print c.description for row_id, row in enumerate(c.fetchall(), 1): print row_id, row card_text = row[0] expansion_name = row[1] cardset = cardset_dict.get(expansion_name) # This shouldn't be needed, should have created earlier with black card expansion if cardset is None: cardset = CardSet(name=expansion_name, description=expansion_name, base_deck=False) cardset.save() cardset_dict[expansion_name] = cardset watermark = expansion_name # could leave it blank white_card = WhiteCard(text=card_text, watermark=watermark) print white_card white_card.save() cardset.white_card.add(white_card) # this is probably not needed due to manual commit later for tmp_cardset_name in cardset_dict: cardset = cardset_dict[tmp_cardset_name] cardset.save() django.db.transaction.commit() c.close() db.commit() db.close()
def handle(self, *args, **options): self.stdout.write("start scraping from Bulbapedia...\n") p = re.compile("official (count|total) (?P<count>\d+)") CardSet.objects.all().delete() html = json.load(urlopen(BASE_URL.format(CARDSET_PAGE)) )['parse']['text']['*'] set_tbl = BeautifulSoup(html).find("table").find( "table").find_all("tr")[1:] for r in set_tbl: data = r.find_all("td") logo_temp = NamedTemporaryFile() try: eng_cs = CardSet(country="GB") eng_cs.set_no = int(r.find("th").text.strip()) eng_cs.name = data[2].text.strip() try: logo_url = data[1].a.img['src'] logo_temp.write(urlopen(logo_url).read()) logo_ext = urlparse(logo_url).path.split('.')[-1] logo_filename="{0}_{1}.{2}".format(eng_cs.country, eng_cs.set_no, logo_ext) logo_temp.flush() eng_cs.logo.save(logo_filename, File(logo_temp)) except (TypeError, AttributeError): pass try: eng_cs.partial_url = data[2].a['title'] except TypeError: pass eng_cs.release = datetime.strptime(data[6].text.strip(), "%B %d, %Y") count = data[4].find("span", "explain") try: eng_cs.official_count = int(p.search(count['title'] ).group("count")) except (TypeError, AttributeError): if data[4].text.strip() != "": eng_cs.official_count = int(data[4].text.strip()) self.stdout.write("scraped: {0}\n".format(eng_cs)) eng_cs.save() except ValueError: # If there is no valid set_no pass try: jap_cs = CardSet(country="JP") jap_cs.set_no = int(data[0].text.strip()) jap_cs.name = data[3].text.strip() try: logo_url = data[1].a.img['src'] logo_temp.write(urlopen(logo_url).read()) logo_ext = urlparse(logo_url).path.split('.')[-1] logo_filename="{0}_{1}.{2}".format(jap_cs.country, jap_cs.set_no, logo_ext) logo_temp.flush() jap_cs.logo.save(logo_filename, File(logo_temp)) except (TypeError, AttributeError): pass try: jap_cs.partial_url = data[2].a['title'] except TypeError: pass jap_cs.release = datetime.strptime(data[7].text.strip(), "%B %d, %Y") count = data[5].find("span", "explain") try: jap_cs.official_count = int(p.search(count['title'] ).group("count")) except (TypeError, AttributeError): if data[5].text.strip() != "": jap_cs.official_count = int(data[5].text.strip()) self.stdout.write("scraped: {0}\n".format(jap_cs)) jap_cs.save() except ValueError: # If there is no valid set_no pass self.stdout.write("total sets {0}\n".format( str(CardSet.objects.all().count())))