def scrape(): login() print('Logged in to TappedOut: {is_authorised}'.format( is_authorised=is_authorised())) raw_decks = fetch_decks() for raw_deck in raw_decks: if is_authorised(): raw_deck.update(fetch_deck_details(raw_deck)) raw_deck = set_values(raw_deck) deck.add_deck(raw_deck)
def scrape() -> None: login() logger.warning('Logged in to TappedOut: {is_authorised}'.format(is_authorised=is_authorised())) raw_decks = fetch_decks() for raw_deck in raw_decks: try: if is_authorised(): raw_deck.update(fetch_deck_details(raw_deck)) raw_deck = set_values(raw_deck) deck.add_deck(raw_deck) except InvalidDataException as e: logger.warning('Skipping {slug} because of {e}'.format(slug=raw_deck.get('slug', '-no slug-'), e=e))
def scrape(limit: int = 255) -> None: page = 1 while page <= limit: time.sleep(0.1) url = 'https://www.mtggoldfish.com/deck/custom/penny_dreadful?page={n}#online'.format( n=page) soup = BeautifulSoup( fetcher.internal.fetch(url, character_encoding='utf-8'), 'html.parser') raw_decks = soup.find_all('div', {'class': 'deck-tile'}) if len(raw_decks) == 0: logger.warning( 'No decks found in {url} so stopping.'.format(url=url)) break for raw_deck in raw_decks: d = Container({'source': 'MTG Goldfish'}) a = raw_deck.select_one('h2 > span.deck-price-online > a') d.identifier = re.findall(r'/deck/(\d+)#online', a.get('href'))[0] d.url = 'https://www.mtggoldfish.com/deck/{identifier}#online'.format( identifier=d.identifier) d.name = a.contents[0].strip() d.mtggoldfish_username = raw_deck.select_one( 'div.deck-tile-author').contents[0].strip() remove_by = re.match(r'^(by )?(.*)$', d.mtggoldfish_username) if remove_by: d.mtggoldfish_username = remove_by.group(2) d.created_date = scrape_created_date(d) time.sleep(1) d.cards = scrape_decklist(d) try: vivified = decklist.vivify(d.cards) # MTGG doesn't do any validation of cards so some decks with fail here with card names like 'Stroke of Genuineness'. except InvalidDataException as e: logger.warning( 'Rejecting decklist of deck with identifier {identifier} because of {e}' .format(identifier=d.identifier, e=e)) continue if len([ f for f in legality.legal_formats(vivified) if 'Penny Dreadful' in f ]) == 0: logger.warning( 'Rejecting deck with identifier {identifier} because it is not legal in any PD formats.' .format(identifier=d.identifier)) continue if len(d.cards) == 0: logger.warning( 'Rejecting deck with identifier {identifier} because it has no cards.' .format(identifier=d.identifier)) continue deck.add_deck(d) page += 1
def tournament_deck(cells: ResultSet, competition_id: int, date: datetime.datetime, final: Dict[str, int]) -> Optional[deck.Deck]: d: deck.RawDeckDescription = { 'source': 'Gatherling', 'competition_id': competition_id, 'created_date': dtutil.dt2ts(date) } player = cells[2] username = aliased(player.a.contents[0].string) d['mtgo_username'] = username d['finish'] = final.get(username) link = cells[4].a d['url'] = gatherling_url(link['href']) d['name'] = link.string if cells[5].find('a'): d['archetype'] = cells[5].a.string else: d['archetype'] = cells[5].string gatherling_id = urllib.parse.parse_qs( urllib.parse.urlparse(str(d['url'])).query)['id'][0] d['identifier'] = gatherling_id existing = deck.get_deck_id(d['source'], d['identifier']) if existing is not None: return deck.load_deck(existing) dlist = decklist.parse( fetcher.internal.post(gatherling_url('deckdl.php'), {'id': gatherling_id})) d['cards'] = dlist if len(dlist['maindeck']) + len(dlist['sideboard']) == 0: logger.warning( 'Rejecting deck with id {id} because it has no cards.'.format( id=gatherling_id)) return None return deck.add_deck(d)
def ad_hoc() -> None: login() logger.warning('Logged in to TappedOut: {is_authorised}'.format(is_authorised=is_authorised())) raw_decks = fetch_decks() for raw_deck in raw_decks: try: if is_authorised(): details = fetch_deck_details(raw_deck) if details is None: logger.warning(f'Failed to get details for {raw_deck}') else: raw_deck.update(details) # type: ignore raw_deck = set_values(raw_deck) deck.add_deck(raw_deck) except InvalidDataException as e: logger.warning('Skipping {slug} because of {e}'.format(slug=raw_deck.get('slug', '-no slug-'), e=e))
def insert_deck(competition_id: int, date: datetime.datetime, d: GatherlingDeck, fs: FinalStandings, players: List[Player]) -> deck.Deck: finish = fuzzy_get(fs, d.playername) if not finish: raise InvalidDataException( f"I don't have a finish for `{d.playername}`") mtgo_username = find_mtgo_username(d.playername, players) if not mtgo_username: raise InvalidDataException( f"I don't have an MTGO username for `{d.playername}`") raw: deck.RawDeckDescription = { 'name': d.name, 'source': 'Gatherling', 'competition_id': competition_id, 'created_date': dtutil.dt2ts(date), 'mtgo_username': mtgo_username, 'finish': finish, 'url': gatherling_url(f'/deck.php?mode=view&id={d.id}'), 'archetype': d.archetype.value, 'identifier': str(d.id), 'cards': { 'maindeck': d.maindeck, 'sideboard': d.sideboard }, } if len(raw['cards']['maindeck']) + len(raw['cards']['sideboard']) == 0: raise InvalidDataException( f'Unable to add deck with no cards `{d.id}`') decklist.vivify(raw['cards']) if deck.get_deck_id(raw['source'], raw['identifier']): raise InvalidArgumentException( "You asked me to insert a deck that already exists `{raw['source']}`, `{raw['identifier']}`" ) return deck.add_deck(raw)
def scrape_one(url: str) -> Container: d = Container({'source': 'MTG Goldfish'}) identifier_match = re.match('.*/deck/([^#]*)(?:#.*)?', url) if identifier_match is None: raise InvalidDataException( 'Cannot find identifier in URL. Is it a valid MTG Goldfish decklist URL?' ) d.identifier = identifier_match.group(1) d.url = url soup = BeautifulSoup( fetcher.internal.fetch(d.url, character_encoding='utf-8'), 'html.parser') d.name = str(soup.select_one('h2.deck-view-title').contents[0]).strip() d.mtggoldfish_username = without_by( str(soup.select_one('span.deck-view-author').contents[0].strip())) d.created_date = parse_created_date(soup) try: d.cards = scrape_decklist(d) except InvalidDataException as e: raise InvalidDataException( f'Unable to scrape decklist for {d} because of {e}') error = vivify_or_error(d) if error: raise InvalidDataException(error) return deck.add_deck(d)
def store_deck(d: deck.Deck) -> deck.Deck: d['source'] = d['source_name'] d['url'] = d['source_url'] existing = deck.get_deck_id(d['source'], d['identifier']) if existing is not None: return deck.load_deck(existing) d['mtgo_username'] = d['person'] d['cards'] = decklist.unvivify(d) return deck.add_deck(d)
def scrape(limit: int = 1) -> None: page = 1 while page <= limit: time.sleep(0.1) url = 'https://www.mtggoldfish.com/deck/custom/penny_dreadful?page={n}#online'.format( n=page) soup = BeautifulSoup( fetcher.internal.fetch(url, character_encoding='utf-8'), 'html.parser') raw_decks = soup.find_all('div', {'class': 'deck-tile'}) if len(raw_decks) == 0: logger.warning( 'No decks found in {url} so stopping.'.format(url=url)) break for raw_deck in raw_decks: d = Container({'source': 'MTG Goldfish'}) a = raw_deck.select_one('h2 > span.deck-price-online > a') d.identifier = re.findall(r'/deck/(\d+)#online', a.get('href'))[0] d.url = 'https://www.mtggoldfish.com/deck/{identifier}#online'.format( identifier=d.identifier) d.name = a.contents[0].strip() d.mtggoldfish_username = without_by( raw_deck.select_one( 'div.deck-tile-author').contents[0].strip()) try: d.created_date = scrape_created_date(d) except InvalidDataException as e: msg = f'Got {e} trying to find a created_date in {d}, {raw_deck}' logger.error(msg) raise InvalidDataException(msg) time.sleep(1) d.cards = scrape_decklist(d) err = vivify_or_error(d) if err: logger.warning(err) continue deck.add_deck(d) page += 1
def tournament_deck(cells, competition_id, date, ranks): d = { 'source': 'Gatherling', 'competition_id': competition_id, 'created_date': dtutil.dt2ts(date) } player = cells[2] d['mtgo_username'] = player.a.contents[0] if player.find('img'): img = re.sub(r'styles/Chandra/images/(.*?)\.png', r'\1', player.img['src']) if img == WINNER: d['finish'] = 1 elif img == SECOND: d['finish'] = 2 elif img == TOP_4: d['finish'] = 3 elif img == TOP_8: d['finish'] = 5 elif img == 'verified': d['finish'] = ranks.get(d['mtgo_username'], None) else: raise InvalidDataException( 'Unknown player image `{img}`'.format(img=img)) else: d['finish'] = ranks.get(d['mtgo_username'], None) parts = cells[3].string.split('-') d['wins'] = parts[0] d['losses'] = parts[1] d['draws'] = 0 if len(parts) < 3 else parts[2] link = cells[4].a d['url'] = gatherling_url(link['href']) d['name'] = link.string if cells[5].find('a'): d['archetype'] = cells[5].a.string else: d['archetype'] = cells[5].string gatherling_id = urllib.parse.parse_qs( urllib.parse.urlparse(d['url']).query)['id'][0] d['identifier'] = gatherling_id if deck.get_deck_id(d['source'], d['identifier']) is not None: return None d['cards'] = decklist.parse( fetcher.internal.post(gatherling_url('deckdl.php'), {'id': gatherling_id})) if len(d['cards']) == 0: print('Rejecting deck with id {id} because it has no cards.'.format( id=gatherling_id)) return None return deck.add_deck(d)
def scrape_url(url: str) -> deck.Deck: if not url.endswith('/'): url += '/' path = urllib.parse.urlparse(url).path slug = path.split('/')[2] raw_deck: RawDeckType = {} raw_deck['slug'] = slug raw_deck['url'] = url if is_authorised(): raw_deck.update(fetch_deck_details(raw_deck)) # type: ignore else: raw_deck.update(parse_printable(raw_deck)) # type: ignore raw_deck = set_values(raw_deck) vivified = decklist.vivify(raw_deck['cards']) errors: Dict[str, Dict[str, Set[str]]] = {} if 'Penny Dreadful' not in legality.legal_formats(vivified, None, errors): raise InvalidDataException('Deck is not legal in Penny Dreadful - {error}'.format(error=errors.get('Penny Dreadful'))) return deck.add_deck(raw_deck)
def scrape_url(url): if not url.endswith('/'): url += '/' path = urllib.parse.urlparse(url).path slug = path.split('/')[2] raw_deck = dict() raw_deck['slug'] = slug raw_deck['url'] = url if is_authorised(): raw_deck.update(fetch_deck_details(raw_deck)) else: raw_deck.update(parse_printable(raw_deck)) raw_deck = set_values(raw_deck) vivified = decklist.vivify(raw_deck['cards']) if 'Penny Dreadful' not in legality.legal_formats(vivified): raise InvalidDataException('Deck is not legal in Penny Dreadful') else: return deck.add_deck(raw_deck)
def signup(form): form.mtgo_username = form.mtgo_username.strip() form.name = form.name.strip() return deck.add_deck(form)
def signup(form: SignUpForm) -> deck.Deck: form.mtgo_username = form.mtgo_username.strip() form.name = form.name.strip() return deck.add_deck(form)