def fetch() -> None:
    all_prices, timestamps = {}, []
    ch_urls = configuration.get_list('cardhoarder_urls')
    if ch_urls:
        for _, url in enumerate(ch_urls):
            s = fetch_tools.fetch(url)
            s = ftfy.fix_encoding(s)
            timestamps.append(
                dtutil.parse_to_ts(
                    s.split('\n', 1)[0].replace('UPDATED ', ''),
                    '%Y-%m-%dT%H:%M:%S+00:00', dtutil.CARDHOARDER_TZ))
            all_prices[url] = parser.parse_cardhoarder_prices(s)
    url = configuration.get_str('mtgotraders_url')
    if url:
        s = fetch_tools.fetch(url)
        timestamps.append(dtutil.dt2ts(dtutil.now()))
        all_prices['mtgotraders'] = parser.parse_mtgotraders_prices(s)
    if not timestamps:
        raise TooFewItemsException(
            'Did not get any prices when fetching {urls} ({all_prices})'.
            format(urls=itertools.chain(
                configuration.get_list('cardhoarder_urls'),
                [configuration.get_str('mtgotraders_url')]),
                   all_prices=all_prices))
    count = store(min(timestamps), all_prices)
    cleanup(count)
Exemple #2
0
def get_article_archive() -> List[Tuple[Tag, str]]:
    try:
        html = fetch_tools.fetch('http://magic.wizards.com/en/articles/archive/184956')
    except fetch_tools.FetchException:
        html = fetch_tools.fetch('http://magic.wizards.com/en/articles/archive/')
    soup = BeautifulSoup(html, 'html.parser')
    return [parse_article_item_extended(a) for a in soup.find_all('div', class_='article-item-extended')]
Exemple #3
0
def find_announcements() -> Tuple[str, bool]:
    articles = [a for a in get_article_archive() if str(a[0].string).startswith('Magic Online Announcements')]
    (title, link) = articles[0]
    print('Found: {0} ({1})'.format(title, link))
    bn = 'Build Notes' in fetch_tools.fetch(link)
    new = update_redirect('announcements', title.text, link, has_build_notes=str(bn))
    return (link, new)
def set_values(raw_deck: RawDeckType) -> RawDeckType:
    raw_deck = translation.translate(translation.TAPPEDOUT, raw_deck)
    raw_decklist = fetch_tools.fetch('{base_url}?fmt=txt'.format(base_url=raw_deck['url']))
    raw_deck['cards'] = decklist.parse(raw_decklist)
    raw_deck['source'] = 'Tapped Out'
    raw_deck['identifier'] = raw_deck['url']
    return raw_deck
def gatherling_deck_comments(d: Deck) -> List[str]:
    url = f'http://gatherling.com/deck.php?mode=view&id={d.identifier}'
    s = fetch_tools.fetch(url)
    result = re.search('COMMENTS</td></tr><tr><td>(.*)</td></tr></table></div><div class="clear"></div><center>', s, re.MULTILINE | re.DOTALL)
    if result:
        return result.group(1).replace('<br />', '\n').split('\n')
    return []
def tournament(url: str, name: str) -> int:
    s = fetch_tools.fetch(url, character_encoding='utf-8', retry=True)

    # Tournament details
    soup = BeautifulSoup(s, 'html.parser')
    cell = soup.find('div', {'id': 'EventReport'}).find_all('td')[1]

    name = cell.find('a').string.strip()
    day_s = cell.find('br').next.strip()
    if '-0001' in day_s:
        # Tournament has been incorrectly configured.
        return 0

    dt, competition_series = get_dt_and_series(name, day_s)
    top_n = find_top_n(soup)
    if top_n == competition.Top.NONE: # Tournament is in progress.
        logger.info('Skipping an in-progress tournament.')
        return 0
    db().begin('tournament')
    competition_id = competition.get_or_insert_competition(dt, dt, name, competition_series, url, top_n)
    ranks = rankings(soup)
    medals = medal_winners(s)
    final = finishes(medals, ranks)
    n = add_decks(dt, competition_id, final, s)
    db().commit('tournament')
    return n
def scrape(limit: int = 50) -> None:
    soup = BeautifulSoup(fetch_tools.fetch('https://gatherling.com/eventreport.php?format=Penny+Dreadful&series=&season=&mode=Filter+Events', character_encoding='utf-8'), 'html.parser')
    tournaments = [(gatherling_url(link['href']), link.string) for link in soup.find_all('a') if link['href'].find('eventreport.php?') >= 0]
    n = 0
    for (url, name) in tournaments:
        i = tournament(url, name)
        n = n + i
        if n > limit:
            return
Exemple #8
0
def run() -> None:
    files = rotation.files()
    n = len(files)
    time_until = TIME_UNTIL_ROTATION - datetime.timedelta(weeks=1)
    if n >= rotation.TOTAL_RUNS:
        print(
            'It is the moment of discovery, the triumph of the mind, and the end of this rotation.'
        )
        return

    if n == 0 and TIME_UNTIL_ROTATION > datetime.timedelta(7):
        print(
            'The monks of the North Tree rarely saw their kodama until the rotation, when it woke like a slumbering, angry bear.'
        )
        print('ETA: {t}'.format(
            t=dtutil.display_time(int(time_until.total_seconds()))))
        return

    if n == 0:
        rotation.clear_redis(clear_files=True)
    #else:
    #    rotation.clear_redis()

    all_prices = {}
    for url in configuration.get_list('cardhoarder_urls'):
        s = fetch_tools.fetch(url)
        s = ftfy.fix_encoding(s)
        all_prices[url] = parse_cardhoarder_prices(s)
    url = configuration.get_str('mtgotraders_url')
    if url:
        s = fetch_tools.fetch(url)
        all_prices['mtgotraders'] = parse_mtgotraders_prices(s)

    run_number = process(all_prices)
    if run_number == rotation.TOTAL_RUNS:
        make_final_list()

    try:
        url = f'{fetcher.decksite_url()}/api/rotation/clear_cache'
        fetch_tools.fetch(url)
    except Exception as c:  # pylint: disable=broad-except
        print(c)
def scrape_user(username: str) -> Dict[str, Optional[str]]:
    parsed: Dict[str, Optional[str]] = {}
    parsed['username'] = username
    s = fetch_tools.fetch('https://tappedout.net/users/{0}/'.format(username))
    soup = BeautifulSoup(s, 'html.parser')
    mtgo = soup.find('td', string='MTGO Username')
    if mtgo is not None:
        parsed['mtgo_username'] = mtgo.find_next_sibling('td').string
    else:
        parsed['mtgo_username'] = None
    return parsed
def tournament_matches(d: deck.Deck) -> List[bs4.element.Tag]:
    url = 'https://gatherling.com/deck.php?mode=view&id={identifier}'.format(identifier=d.identifier)
    s = fetch_tools.fetch(url, character_encoding='utf-8', retry=True)
    soup = BeautifulSoup(s, 'html.parser')
    anchor = soup.find(string='MATCHUPS')
    if anchor is None:
        logger.warning('Skipping {id} because it has no MATCHUPS.'.format(id=d.id))
        return []
    table = anchor.findParents('table')[0]
    rows = table.find_all('tr')
    rows.pop(0) # skip header
    rows.pop() # skip empty last row
    return find_matches(d, rows)
def parse_printable(raw_deck: RawDeckType) -> RawDeckType:
    """If we're not authorized for the TappedOut API, this method will collect name and author of a deck.
    It could also grab a date, but I haven't implemented that yet."""
    s = fetch_tools.fetch(raw_deck['url'] + '?fmt=printable')
    soup = BeautifulSoup(s, 'html.parser')
    raw_deck['name'] = soup.find('h2').string.strip('"')
    infobox = soup.find('table', {'id': 'info_box'})
    if not infobox:
        raise InvalidDataException('Unable to find infobox in parse_printable.')
    user = infobox.find('td', string='User')
    if not user:
        raise InvalidDataException('Unable to find user in parse_printable.')
    raw_deck['user'] = user.find_next_sibling('td').string
    return raw_deck
def legal_cards(force: bool = False, season: str = None) -> List[str]:
    if season is None:
        url = 'legal_cards.txt'
    else:
        url = '{season}_legal_cards.txt'.format(season=season)
    encoding = 'utf-8' if season != 'EMN' else 'latin-1' # EMN was encoded weirdly.
    cached_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'legal_cards')
    if os.path.exists(os.path.join(cached_path, url)):
        h = open(os.path.join(cached_path, url), encoding=encoding)
        legal = h.readlines()
        h.close()
        return [l.strip() for l in legal]

    url = 'http://pdmtgo.com/' + url
    legal_txt = fetch_tools.fetch(url, encoding, force=force)
    if season is not None and configuration.get_bool('save_historic_legal_lists'):
        with open(os.path.join(cached_path, f'{season}_legal_cards.txt'), 'w', encoding=encoding) as h:
            h.write(legal_txt)

    return legal_txt.strip().split('\n')
def scrape(limit: int = 1) -> None:
    page = 1
    while page <= limit:
        time.sleep(0.1)
        url = 'https://www.mtggoldfish.com/deck/custom/penny_dreadful?page={n}#online'.format(
            n=page)
        soup = BeautifulSoup(
            fetch_tools.fetch(url, character_encoding='utf-8'), 'html.parser')
        raw_decks = soup.find_all('div', {'class': 'deck-tile'})
        if len(raw_decks) == 0:
            logger.warning(
                'No decks found in {url} so stopping.'.format(url=url))
            break
        for raw_deck in raw_decks:
            d = Container({'source': 'MTG Goldfish'})
            a = raw_deck.select_one('.title > span.deck-price-online > a')
            d.identifier = re.findall(r'/deck/(\d+)#online', a.get('href'))[0]
            d.url = 'https://www.mtggoldfish.com/deck/{identifier}#online'.format(
                identifier=d.identifier)
            d.name = a.contents[0].strip()
            d.mtggoldfish_username = without_by(
                raw_deck.select_one(
                    'div.deck-tile-author').contents[0].strip())
            try:
                d.created_date = scrape_created_date(d)
            except InvalidDataException as e:
                msg = f'Got {e} trying to find a created_date in {d}, {raw_deck}'
                logger.error(msg)
                raise InvalidDataException(msg) from e
            time.sleep(5)
            d.cards = scrape_decklist(d)
            err = vivify_or_error(d)
            if err:
                logger.warning(err)
                continue
            deck.add_deck(d)
        page += 1
def scrape_one(url: str) -> Container:
    d = Container({'source': 'MTG Goldfish'})
    identifier_match = re.match('.*/deck/([^#]*)(?:#.*)?', url)
    if identifier_match is None:
        raise InvalidDataException(
            'Cannot find identifier in URL. Is it a valid MTG Goldfish decklist URL?'
        )
    d.identifier = identifier_match.group(1)
    d.url = url
    soup = BeautifulSoup(fetch_tools.fetch(d.url, character_encoding='utf-8'),
                         'html.parser')
    d.name = str(soup.select_one('h2.deck-view-title').contents[0]).strip()
    d.mtggoldfish_username = without_by(
        str(soup.select_one('span.deck-view-author').contents[0].strip()))
    d.created_date = parse_created_date(soup)
    try:
        d.cards = scrape_decklist(d)
    except InvalidDataException as e:
        raise InvalidDataException(
            f'Unable to scrape decklist for {d} because of {e}') from e
    error = vivify_or_error(d)
    if error:
        raise InvalidDataException(error)
    return deck.add_deck(d)
def scrape_created_date(d: Container) -> int:
    soup = BeautifulSoup(fetch_tools.fetch(d.url, character_encoding='utf-8'),
                         'html.parser')
    return parse_created_date(soup)
def downtimes() -> str:
    return fetch_tools.fetch('https://pennydreadfulmtg.github.io/modo-bugs/downtimes.txt')
Exemple #17
0
def import_from_pdbot(match_id: int) -> None:
    url = f'https://pdbot.pennydreadfulmagic.com/logs/{match_id}'
    lines = fetch_tools.fetch(url).split('\n')
    import_log(lines, match_id)
def scrape_decklist(d: Container) -> decklist.DecklistType:
    url = 'https://www.mtggoldfish.com/deck/download/{identifier}'.format(
        identifier=d.identifier)
    return decklist.parse(fetch_tools.fetch(url))
def scrape(url: str) -> None:
    soup = BeautifulSoup(fetch_tools.fetch(url), 'html.parser')
    for b in soup.find_all('h2'):
        parse_header(b)