def get_article_archive() -> List[Tuple[Tag, str]]: try: html = internal.fetch('http://magic.wizards.com/en/articles/archive/184956') except internal.FetchException: html = internal.fetch('http://magic.wizards.com/en/articles/archive/') soup = BeautifulSoup(html, 'html.parser') return [parse_article_item_extended(a) for a in soup.find_all('div', class_='article-item-extended')]
def fetch() -> None: all_prices, timestamps = {}, [] ch_urls = configuration.get_list('cardhoarder_urls') if ch_urls: for _, url in enumerate(ch_urls): s = fetcher_internal.fetch(url) s = ftfy.fix_encoding(s) timestamps.append( dtutil.parse_to_ts( s.split('\n', 1)[0].replace('UPDATED ', ''), '%Y-%m-%dT%H:%M:%S+00:00', dtutil.CARDHOARDER_TZ)) all_prices[url] = parser.parse_cardhoarder_prices(s) url = configuration.get_str('mtgotraders_url') if url: s = fetcher_internal.fetch(url) timestamps.append(dtutil.dt2ts(dtutil.now())) all_prices['mtgotraders'] = parser.parse_mtgotraders_prices(s) if not timestamps: raise TooFewItemsException( 'Did not get any prices when fetching {urls} ({all_prices})'. format(urls=itertools.chain( configuration.get_list('cardhoarder_urls'), [configuration.get_str('mtgotraders_url')]), all_prices=all_prices)) store(min(timestamps), all_prices) cleanup()
def run() -> None: files = rotation.files() n = len(files) time_until = min( TIME_UNTIL_FULL_ROTATION, TIME_UNTIL_SUPPLEMENTAL_ROTATION) - datetime.timedelta(weeks=1) if n >= TOTAL_RUNS: print( 'It is the moment of discovery, the triumph of the mind, and the end of this rotation.' ) return if n == 0 and TIME_UNTIL_FULL_ROTATION > datetime.timedelta( 7) and TIME_UNTIL_SUPPLEMENTAL_ROTATION > datetime.timedelta(7): print( 'The monks of the North Tree rarely saw their kodama until the rotation, when it woke like a slumbering, angry bear.' ) print('ETA: {t}'.format( t=dtutil.display_time(time_until.total_seconds()))) return all_prices = {} for url in configuration.get_list('cardhoarder_urls'): s = fetcher_internal.fetch(url) s = ftfy.fix_encoding(s) all_prices[url] = parse_cardhoarder_prices(s) url = configuration.get_str('mtgotraders_url') if url: s = fetcher_internal.fetch(url) all_prices['mtgotraders'] = parse_mtgotraders_prices(s) run_number = process(all_prices) if run_number == TOTAL_RUNS: make_final_list()
def find_announcements() -> Tuple[str, bool]: articles = [a for a in get_article_archive() if str(a[0].string).startswith('Magic Online Announcements')] (title, link) = articles[0] print('Found: {0} ({1})'.format(title, link)) bn = 'Build Notes' in internal.fetch(link) new = update_redirect('announcements', title.text, link, has_build_notes=str(bn)) return (link, new)
def set_values(raw_deck: DeckType) -> DeckType: raw_deck = translation.translate(translation.TAPPEDOUT, raw_deck) raw_decklist = fetcher_internal.fetch('{base_url}?fmt=txt'.format(base_url=raw_deck['url'])) raw_deck['cards'] = decklist.parse(raw_decklist) raw_deck['source'] = 'Tapped Out' raw_deck['identifier'] = raw_deck['url'] return raw_deck
def gatherling_deck_comments(d: Deck) -> List[str]: url = f'http://gatherling.one/deck.php?mode=view&id={d.identifier}' s = internal.fetch(url) result = re.search( 'COMMENTS</td></tr><tr><td>(.*)</td></tr></table></div><div class="clear"></div><center>', s, re.MULTILINE | re.DOTALL) if result: return result.group(1).replace('<br />', '\n').split('\n') return []
def parse_printable(raw_deck: DeckType) -> DeckType: """If we're not authorized for the TappedOut API, this method will collect name and author of a deck. It could also grab a date, but I haven't implemented that yet.""" s = fetcher_internal.fetch(raw_deck['url'] + '?fmt=printable') soup = BeautifulSoup(s, 'html.parser') raw_deck['name'] = soup.find('h2').string.strip('"') infobox = soup.find('table', {'id': 'info_box'}) user = infobox.find('td', string='User') raw_deck['user'] = user.find_next_sibling('td').string return raw_deck
def scrape_user(username): parsed = {} parsed['username'] = username s = fetcher_internal.fetch('https://tappedout.net/users/{0}/'.format(username)) soup = BeautifulSoup(s, 'html.parser') mtgo = soup.find('td', string='MTGO Username') if mtgo is not None: parsed['mtgo_username'] = mtgo.find_next_sibling('td').string else: parsed['mtgo_username'] = None return parsed
def legal_cards(force: bool = False, season: str = None) -> List[str]: if season is None and os.path.exists('legal_cards.txt'): print('HACK: Using local legal_cards override.') h = open('legal_cards.txt') legal = h.readlines() h.close() return [l.strip() for l in legal] if season is None: url = 'http://pdmtgo.com/legal_cards.txt' else: url = 'http://pdmtgo.com/{season}_legal_cards.txt'.format(season=season) encoding = 'utf-8' if season != 'EMN' else 'latin-1' # EMN was encoded weirdly. legal_txt = internal.fetch(url, encoding, force=force) return legal_txt.strip().split('\n')
def legal_cards(force: bool = False, season: str = None) -> List[str]: if season is None: url = 'legal_cards.txt' else: url = '{season}_legal_cards.txt'.format(season=season) encoding = 'utf-8' if season != 'EMN' else 'latin-1' # EMN was encoded weirdly. cached_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'legal_cards') if os.path.exists(os.path.join(cached_path, url)): h = open(os.path.join(cached_path, url), encoding=encoding) legal = h.readlines() h.close() return [l.strip() for l in legal] url = 'http://pdmtgo.com/' + url legal_txt = internal.fetch(url, encoding, force=force) if season is not None and configuration.get_bool('save_historic_legal_lists'): with open(os.path.join(cached_path, f'{season}_legal_cards.txt'), 'w', encoding=encoding) as h: h.write(legal_txt) return legal_txt.strip().split('\n')
def scrape(url: str) -> None: soup = BeautifulSoup(fetcher_internal.fetch(url), 'html.parser') for b in soup.find_all('h2'): parse_header(b)
def downtimes() -> str: return internal.fetch( 'https://pennydreadfulmtg.github.io/modo-bugs/downtimes.txt')
def import_from_pdbot(match_id: int) -> None: url = f'https://pdbot.pennydreadfulmagic.com/logs/{match_id}' lines = fetcher_internal.fetch(url).split('\n') import_log(lines, match_id)