def get_deck(url): try: # I tack on /?fmt=txt because it gives th list of cards in a somewhat nice # text format. If only there was an API... con = core.urlopen(url.rstrip('/') + '/?fmt=txt') except urllib2.HTTPError as e: # This will happen on 404 or any other error. logging.warning("Someone posted a bad URL: " + url + " (%s)" % str(e)) return None deck = set() # For each line in the content of the web page.... for line in con.splitlines(): line = line.strip() if len(line) == 0: continue if not line[0] in '0123456789': continue # At this point, the line is not empty and the line starts with a number # This, we know, is a card # The line is tab delimited like this: "1\tAustere Command\n" card = line.split('\t')[1] try: deck.add(core.sanitize_cardname(card)) except KeyError: pass except ValueError as e: logging.warning("Ignored this card because of some sort of bad value") # Call out to get_tappedout_info to grab the deck info cmdr, colors, date = get_tappedout_info(url) # if they didn't post the commander, i'm going to try to figure out who it is if cmdr is None: for card in deck: cd = core.lookup_card(card) if not cd.has_key('supertypes'): continue if 'Legendary' in cd['supertypes'] and sorted(list(core.color_identity(card))) == sorted(list(colors)): # ok, we've got a legenadry with the colors i think the deck should be. i'll just stop here. cmdr = card break else: logging.warn("there was no legendary creature here.... and none was specified... something f'd up is going on") cmdr = 'jedit ojanen' deck.add(cmdr) out_deck = { 'commander' : cmdr, 'cards' : sorted(list(deck)), 'date' : date } return out_deck
def scrapedeck(url_str): logging.debug('attempting to scrape the deckstats url: %s ' % url_str) url_fetch = url_str + EXPORT_APPEND logging.debug("going to go fetch '%s'" %url_fetch) try: content = urllib2.urlopen(url_fetch).readlines() except: raise ValueError("Invalid URL '%s'" % url_str) text = content[0][len('//NAME: '):-len('from DeckStats.net') - 2] logging.debug('The name of this deck is: %s' % text) cards = set() sideboard = set() for line in content: line = line.split('//')[0] line = line.split('#')[0] line = line.strip() if len(line) == 0: continue if line.startswith('SB:'): sideboard.add(core.sanitize_cardname(line.split(' ', 2)[2])) line = line[4:] if not line[0] in '0123456789': raise ValueError("This isn't a valid line of the form '# Card Name': %s " % line) cardname = core.sanitize_cardname(line.split(' ', 1)[1]) cards.add(cardname) commander = None if len(sideboard) == 1: cardname = list(sideboard)[0] card = core.lookup_card(cardname) if card.has_key('supertypes') and 'Legendary' in card['supertypes']: commander = list(sideboard)[0] if commander is None: commander = guess_commander(cards, text) out = {} out['url'] = url_str out['scrapedate'] = str(datetime.datetime.now()) out['commander'] = commander out['cards'] = sorted( cards ) out['ref'] = 'deckstats' return out
def tally(decks): types = {u'Creature' : 0, u'Enchantment' : 0, u'Sorcery' : 0, \ u'Instant' : 0, u'Artifact' : 0, u'Planeswalker' : 0} curve = {'0':0, '1':0, '2':0, '3':0, '4':0, '5':0, '6':0, '7':0, '8+':0} colors= {u'Red' : 0, u'Blue' : 0, u'Green' : 0, u'White' : 0, u'Black' : 0} nonland_counts = [] c = 0 for deck in decks: c += 1 nonlands = 0 for card in deck['cards']: cd = core.lookup_card(card) if cd is None: continue if not 'Land' in cd['types']: nonlands += 1 for t in cd['types']: if not t in types.keys(): continue types[t] += 1 if cd.has_key('cmc'): if cd['cmc'] >= 8: curve['8+'] += 1 else: curve[str(cd['cmc'])] += 1 if cd.has_key('colors'): if u'Land' in cd['types']: continue for col in cd['colors']: colors[col] += 1 nonland_counts.append(nonlands) for key in types: types[key] /= c for key in curve: curve[key] /= c for key in colors: colors[key] /= c nonland_average = sum(nonland_counts) / len(nonland_counts) out = {} out['types'] = types out['curve'] = sorted(curve.items()) out['colors'] = colors out['nonlands'] = nonland_average out['lands'] = 99 - nonland_average return out
def rec(self, to=None, ref=None): ip = cherrypy.request.remote.ip r = core.get_redis() cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if r.exists('api' + str(ip)): return json.dumps('Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=5) if tappedout is None: return json.dumps(None) deck = tappedout.get_deck(to) newrecs, outrecs = core.recommend(deck) newrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in newrecs if sc > .3 ] outrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in outrecs if sc > .5 ] deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['date'] = str(datetime.datetime.now()) core.add_deck(deck) return json.dumps({'url' : to, 'recs' : newrecs, 'cuts' : outrecs})
def guess_commander(cards, text=''): text = text.lower().strip().replace('and', '').replace('or', '').replace('of', '').replace('the', '') text = ''.join( c for c in text if c.isalpha() or c == ' ' ) candidates = [] colors = set() for cardname in cards: card = core.lookup_card(cardname) if card is None: logging.warn('ignoring this card %s because i couldnt find it' % cardname) continue try: if 'Legendary' in card['supertypes'] and 'Creature' in card['types']: candidates.append(cardname) colors = colors.union(set(core.color_identity(cardname))) except KeyError: continue colors = sorted(colors) candidates = [ cardname for cardname in candidates if core.color_identity(cardname) == colors ] if len(candidates) == 0: raise ValueError("There is no good commander option for this pool of cards") if len(candidates) == 1: return candidates[0] wordmatch = [] for cardname in candidates: ncardname = ''.join( c for c in cardname if c.isalpha() or c == ' ' ) tokens = [ k.rstrip('s') for k in ncardname.split() ] texttokens = [ k.rstrip('s') for k in text.split() ] logging.debug(str(tokens) + ' vs. ' + str(texttokens) + ' (word match)') c = len( [t for t in tokens if t.rstrip('s') in texttokens] ) wordmatch.append((c, cardname)) wordmatch.sort(reverse=True) logging.debug("There are multiple candidates, these are the scores: %s" % str(wordmatch)) return wordmatch[0][1]
def print_rec(rec,t): print '<table class="table table-striped table-condensed table-hover"><tr><th style="width: 20px;">Score</th><th style="width: 200px;">%s</th><th>Mana Cost</th></tr>' % (t) for i in range(20): manacost = core.lookup_card(rec[i][0])['manaCost'] try: l = '<tr><td style="text-align: center;"><span class="badge">%d</td><td>%s</td><td>%s</td></tr>' % (rec[i][1], linkify(rec[i][0]), manacost) except IndexError: l = ' ' if len(l) == 1: break print l print '</table>'
def scrape_deck(url_str): logging.debug('scraping a deck for %s' % url_str) content = urllib2.urlopen(url_str).read() parsed = bs.BeautifulSoup(content) tables = parsed.findAll('table') deck = [] # find the deck for t in tables: attrs = dict(t.attrs) if attrs['class'] != u'deck': continue data = json.loads(attrs['data-card-list']) num_cards = 0 for card in data['Deck']: num_cards += card['Qty'] deck.append(core.sanitize_cardname(card['CardName'])) if num_cards < 95 or num_cards > 102: # raise ValueError("This deck has %d cards... that's bad." % num_cards) pass if not core.lookup_card(deck[0]).has_key(u'supertypes') or not u'Legendary' in core.lookup_card(deck[0])[u'supertypes']: raise ValueError("The first card in this deck is not legendary.") break else: raise ValueError("I couldn't find a deck in this post") out = {} out['url'] = url_str out['mtgsalvation'] = url_str out['date'] = datetime.datetime.now().toordinal() out['scrapedate'] = str(datetime.datetime.now()) out['commander'] = deck[0] out['cards'] = sorted(deck) out['ref'] = 'mtgsalvation' return out
def cmdr(self, commander): commander = commander[:50] cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() ckey = 'CACHE_COMMANDER_' + commander.replace(' ', '_') if r.exists(ckey): return r.get(ckey) commander = core.sanitize_cardname(commander) commander = closest_commander(commander) colors = core.color_identity(commander) decks = [ deck for deck in core.get_decks(colors) if deck['commander'] == commander] out = {} out['numdecks'] = len(decks) cards = {} for deck in decks: for card in deck['cards']: cards[card] = {'count' : 0, 'cardname' : card, 'card_info' : core.lookup_card(card)} for deck in decks: for card in deck['cards']: if card == commander: continue if card in ['swamp', 'island', 'mountain', 'forest', 'plains']: continue cards[card]['count'] += 1 out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 and pp['count'] > .1 * len(decks) ] out['commander'] = core.cap_cardname(commander) r.set(ckey, json.dumps(out), ex=60*60*24*7) # 7 day cache return json.dumps(out)
def rec_by_type(deck, t): newrecs, outrecs = core.recommend(deck) rec = [] for card, score in newrecs: # filter out basic lands from being recommendations if card in ['swamp', 'island', 'plains', 'mountain', 'forest']: continue # there is an annoying thing that happens when people use snow-covered basics # where edhrec will post basic lands as a recommendation. this prevents that if score < .3: continue score = int(score * 100) # make score easier to read try: types = core.lookup_card(card)['types'] except: logging.warn('something went wong with the card %s, ignoring it' % card) continue if t in types: rec.append((card, score)) return rec
def seek_submissions(sublimit=200): logging.debug('STARTING SUBMISSION SEEK AT ' + str(datetime.datetime.now())) # Scan edh and edhrec subreddit = PRAW.get_subreddit('edhrec+edh').get_new(limit=sublimit) rds = core.get_redis() # For each submission in newness order... for submission in subreddit: # Check to see if I've scanned this already. If so, pass on it. if not TESTING: if rds.sismember('SEEN', submission.id): continue logging.debug("Scanning " + str(submission.id) + " - " + str(submission.title.encode('utf-8'))) # Fetch the tappedout url url = find_tappedout_url(submission) # If there was no tappedout URL, then let's pass over this one. if url is None: rds.sadd('SEEN', submission.id) continue ## At this point, we have a deck we'e never seen before that has been posted! # # ~*~ GET EXCITED ~*~ logging.debug("I found a URL to scrape: " + str(url)) # Scrape it deck = tappedout.get_deck(url) if deck is None: logging.warning('Skipping this URL because something went wrong. (' + submission.title.encode('utf-8') +')') rds.sadd('SEEN', submission.id) continue # Go get the recommendations newrecs, outrecs = core.recommend(deck) lands = [] creatures =[] noncreatures = [] for card, score in newrecs: # filter out basic lands from being recommendations if card in ['swamp', 'island', 'plains', 'mountain', 'forest']: continue # there is an annoying thing that happens when people use snow-covered basics # where edhrec will post basic lands as a recommendation. this prevents that if score < .3: continue score = int(score * 100) # make score easier to read try: types = core.lookup_card(card)['types'] except: logging.warn('something went wong with the card %s, ignoring it' % card) continue if 'Creature' in types: creatures.append((card, score)) elif 'Land' in types: lands.append((card, score)) else: noncreatures.append((card, score)) # build the output string if str(submission.subreddit).lower() in ['edhrec', 'edh']: out_str = ['Other decks like yours use:\n\nCreatures | Non-creatures | Lands | Unique in your deck\n:--------|:---------|:---------|:--------'] for i in range(16): try: c = '[%d] %s ' % (creatures[i][1], linkify(creatures[i][0])) except IndexError: c = ' ' try: n = '[%d] %s ' % (noncreatures[i][1], linkify(noncreatures[i][0])) except IndexError: n = ' ' try: l = '[%d] %s ' % (lands[i][1], linkify(lands[i][0])) except IndexError: l = ' ' try: u = '%s ' % linkify(outrecs[i][0]) except IndexError: u = ' ' if len(c + n + l) == 3: break out_str.append('%s | %s | %s | %s' % (c, n , l, u)) out_str.append(BOT_NOTICE) elif str(submission.subreddit).lower() == 'edh': pass elif str(submission.subreddit).lower() == 'competetiveedh': pass # Post the comment! if not TESTING: submission.add_comment('\n'.join(out_str)) logging.debug('comment i think I posted:\n' + '\n'.join(out_str)) logging.debug("I posted a comment with recommendations!") # Keep track of the fact that I've now processed this deck. # It is important that this is last in case the scraping fails and # the problem is later fixed. if not TESTING: rds.sadd('SEEN', submission.id) core.add_deck(deck) sleep() logging.debug('DONE WITH SUBMISSION SEEK AT ' + str(datetime.datetime.now()))
def rec(self, to=None, ref=None): to = to[:500].strip() if ref is None: ref = "No ref" ref = ref[:20].strip() cherrypy.response.headers['Content-Type'] = 'application/json' cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if not ('tappedout.net/mtg-decks' in to or 'mtgsalvation.com/forums/' in to \ or 'deckstats.net/deck' in to or 'gracefulstats.com/de' in to): raise ValueError( 'invalid deck url %s . it should look like http://tappedout.net/mtg-decks/xxxx or http://www.mtgsalvation.com/forums/xxxx or http://deckstats.net/decks/xxxx/xxxx or http://www.gracefulstats.com/deck/view/xxxx' % to) ip = cherrypy.request.remote.ip r = core.get_redis() if r.exists('api' + str(ip)): logging.warn('%s ip is overloading' % str(ip)) return json.dumps( 'Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=1) deck = None if 'tappedout' in to: deck = tappedout.get_deck(to) elif 'mtgsalvation' in to: deck = mtgsalvation.scrape_deck(to) elif 'deckstats' in to: deck = deckstatscom.scrapedeck(to) elif 'gracefulstats' in to: deck = gracefulstats.scrapedeck(to) deck['scrapedate'] = str(datetime.datetime.now()) if deck['commander'] == 'jedit ojanen': raise ValueError( 'You input a deck without a valid commander. Please go back and add it to the web interface.' ) core.add_recent(to, \ core.cap_cardname(deck['commander'])) hashkey = 'CACHE_REC_' + core.hash_pyobj([deck['cards']] + [deck['commander']]) if r.exists(hashkey): return r.get(hashkey) newrecs, outrecs, topk = core.recommend(deck, returnk=True) outnewrecs = [] for cn, sc in newrecs: if sc < .3: continue try: cd = { 'score': sc, 'card_info': { 'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types'] } } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outnewrecs.append(cd) outoutrecs = [] for cn, sc in outrecs: if sc < .5: continue try: cd = { 'score': sc, 'card_info': { 'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types'] } } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outoutrecs.append(cd) deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['ip'] = str(ip) try: deck['headref'] = cherrypy.request.headerMap['Referer'] except AttributeError: pass core.add_deck(deck) stats = deckstats.tally([deck]) kstats = deckstats.tally(topk) cstats = deckstats.get_commander_stats(deck['commander']) output_json = json.dumps({'url' : to, 'recs' : outnewrecs, 'cuts' : outoutrecs, \ 'stats' : stats, 'kstats' : kstats, 'cstats' : cstats}, indent=4) r.set(hashkey, output_json, ex=60 * 60 * 24 * 3) # 3 days expiration ckey = 'CACHE_COMMANDER_' + deck['commander'].replace(' ', '_') r.delete(ckey) return output_json
def cmdr(self, commander, nolog=False): commander = commander[:50] cherrypy.response.headers['Content-Type'] = 'application/json' cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() commander = core.sanitize_cardname(commander) commander = closest_commander(commander) r = core.get_redis() if not cherrypy.session.has_key('id'): cherrypy.session['id'] = ''.join( random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8)) if not nolog: r.sadd("SESSION_CMDRSEARCH_" + cherrypy.session['id'], commander) ckey = 'CACHE_COMMANDER_' + commander.replace(' ', '_') if r.exists(ckey): return r.get(ckey) colors = core.color_identity(commander) decks = [ deck for deck in core.get_decks(colors) if deck['commander'] == commander ] if len(decks) < 3: return json.dumps({ 'error_code': 'NOT_ENOUGH_DATA', 'message': 'There are not enough decks in my database to generate recommendations for %s' % commander }) out = {} out['numdecks'] = len(decks) cards = {} for deck in decks: for card in deck['cards']: try: cards[card] = {'count' : 0, 'card_info' : {'name' : core.lookup_card(card)['name'], \ 'types' : core.lookup_card(card)['types'], \ 'colors' : core.lookup_card(card).get('colors', []), \ 'cmc' : core.lookup_card(card).get('cmc', 0) \ } } except TypeError: logging.warn( "for some reason card %s could not be looked up, ignoring." % card) continue for deck in decks: for card in deck['cards']: if card == commander: continue if card in ['swamp', 'island', 'mountain', 'forest', 'plains']: continue try: cards[card]['count'] += 1 except KeyError: continue #out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 and pp['count'] > .1 * len(decks) ] out['recs'] = [ pp for pp in sorted(cards.values(), key=(lambda x: -1 * x['count'])) if pp['count'] > 1 ][:125] out['commander'] = core.cap_cardname(commander) out['stats'] = deckstats.get_commander_stats(commander) # kmeans output for subtopics if len(decks) > 15: out['archetypes'] = kmeans.kmeans(commander) r.set(ckey, json.dumps(out), ex=60 * 60 * 24 * 2) # 2 day cache return json.dumps(out)
def tally(decks): types = {u'Creature' : 0, u'Enchantment' : 0, u'Sorcery' : 0, \ u'Instant' : 0, u'Artifact' : 0, u'Planeswalker' : 0} curve = { '0': 0, '1': 0, '2': 0, '3': 0, '4': 0, '5': 0, '6': 0, '7': 0, '8+': 0 } colors = {u'Red': 0, u'Blue': 0, u'Green': 0, u'White': 0, u'Black': 0} nonland_counts = [] c = 0 for deck in decks: c += 1 nonlands = 0 for card in deck['cards']: cd = core.lookup_card(card) if cd is None: continue if not 'Land' in cd['types']: nonlands += 1 for t in cd['types']: if not t in types.keys(): continue types[t] += 1 if cd.has_key('cmc'): if cd['cmc'] >= 8: curve['8+'] += 1 else: curve[str(cd['cmc'])] += 1 if cd.has_key('colors'): if u'Land' in cd['types']: continue for col in cd['colors']: colors[col] += 1 nonland_counts.append(nonlands) for key in types: types[key] /= c for key in curve: curve[key] /= c for key in colors: colors[key] /= c nonland_average = sum(nonland_counts) / len(nonland_counts) out = {} out['types'] = types out['curve'] = sorted(curve.items()) out['colors'] = colors out['nonlands'] = nonland_average out['lands'] = 99 - nonland_average return out
def cmdr(self, commander, nolog=False): commander = commander[:50] cherrypy.response.headers['Content-Type']= 'application/json' cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() commander = core.sanitize_cardname(commander) commander = closest_commander(commander) r = core.get_redis() if not cherrypy.session.has_key('id'): cherrypy.session['id'] = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8)) if not nolog: r.sadd("SESSION_CMDRSEARCH_" +cherrypy.session['id'], commander) ckey = 'CACHE_COMMANDER_' + commander.replace(' ', '_') if r.exists(ckey): return r.get(ckey) colors = core.color_identity(commander) decks = [ deck for deck in core.get_decks(colors) if deck['commander'] == commander] if len(decks) < 3: return json.dumps({'error_code' : 'NOT_ENOUGH_DATA', 'message' : 'There are not enough decks in my database to generate recommendations for %s' % commander}) out = {} out['numdecks'] = len(decks) cards = {} for deck in decks: for card in deck['cards']: try: cards[card] = {'count' : 0, 'card_info' : {'name' : core.lookup_card(card)['name'], \ 'types' : core.lookup_card(card)['types'], \ 'colors' : core.lookup_card(card).get('colors', []), \ 'cmc' : core.lookup_card(card).get('cmc', 0) \ } } except TypeError: logging.warn("for some reason card %s could not be looked up, ignoring." % card) continue for deck in decks: for card in deck['cards']: if card == commander: continue if card in ['swamp', 'island', 'mountain', 'forest', 'plains']: continue try: cards[card]['count'] += 1 except KeyError: continue #out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 and pp['count'] > .1 * len(decks) ] out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 ][:125] out['commander'] = core.cap_cardname(commander) out['stats'] = deckstats.get_commander_stats(commander) # kmeans output for subtopics if len(decks) > 15: out['archetypes'] = kmeans.kmeans(commander) r.set(ckey, json.dumps(out), ex=60*60*24*2) # 2 day cache return json.dumps(out)
def kmeans(cmdr, k=4): # random.seed(52485) cmdr = core.sanitize_cardname(cmdr) card_to_idx = {} idx_to_card = {} dims = None decks = [] i = 0 for deck in core.get_all_decks(): if deck['commander'] != cmdr: continue for card in deck['cards']: if card in ['island', 'swamp', 'mountain', 'forest', 'plains', cmdr]: continue lo = core.lookup_card(card) if lo is None or 'Land' in lo['types']: continue if card_to_idx.has_key(card): continue card_to_idx[card] = i idx_to_card[i] = card i += 1 ll = numpy.zeros(i, dtype=int) idxs = [] for card in deck['cards']: try: idxs.append(card_to_idx[card]) except KeyError: continue for idx in idxs: ll[idx] = 1 decks.append(ll) for idx, deck in enumerate(decks): decks[idx].resize(i, refcheck=False) decks = numpy.array(decks, dtype=int) kmc = sklearn.cluster.KMeans(n_clusters=k, init='k-means++', n_init=25, max_iter=300, tol=0.000001, precompute_distances=True, verbose=0, random_state=None, n_jobs=1) kmc.fit(decks) clusters = [ [] for i in range(k) ] out = [] for idx, deck in enumerate(decks): clusters[kmc.labels_[idx]].append([idx_to_card[idx] for idx, v in enumerate(deck) if v == 1]) for idx, cluster in enumerate(kmc.cluster_centers_): outc = {} sumdiff = sum([ cluster - other for other in kmc.cluster_centers_ ]) defining = sorted( enumerate(sumdiff), key=lambda x: x[1], reverse=True)[:12] defining = [ {'score' : val, 'card_info' : {'name' : core.lookup_card(idx_to_card[jdx])['name'], \ 'types' : core.lookup_card(idx_to_card[jdx])['types'], \ 'colors' : core.lookup_card(idx_to_card[jdx]).get('colors', []), \ 'cmc' : core.lookup_card(idx_to_card[jdx]).get('cmc', 0) } } for jdx, val in defining ] topc = sorted( [(val, idx_to_card[jdx] ) for jdx, val in enumerate(cluster)], reverse=True)[:125] topc = [ {'score' : val, 'card_info' : {'name' : core.lookup_card(card)['name'], \ 'types' : core.lookup_card(card)['types'], \ 'colors' : core.lookup_card(card).get('colors', []), \ 'cmc' : core.lookup_card(card).get('cmc', 0) } } for val, card in topc ] outc['defining'] = defining outc['recs'] = topc outc['numdecks'] = len(clusters[idx]) outc['percentdecks'] = int( len(clusters[idx]) / float(len(decks)) * 100 ) outc['commander'] = cmdr outc['stats'] = deckstats.tally([ {'cards' : d } for d in clusters[idx] ]) out.append(outc) return sorted(out, key=lambda x: x['percentdecks'], reverse=True)
def seek_submissions(sublimit=200): logging.debug('STARTING SUBMISSION SEEK AT ' + str(datetime.datetime.now())) # Scan edh and edhrec subreddit = PRAW.get_subreddit('edhrec+edh').get_new(limit=sublimit) rds = core.get_redis() # For each submission in newness order... for submission in subreddit: # Check to see if I've scanned this already. If so, pass on it. if not TESTING: if rds.sismember('SEEN', submission.id): continue logging.debug("Scanning " + str(submission.id) + " - " + str(submission.title.encode('utf-8'))) # Fetch the tappedout url url = find_tappedout_url(submission) # If there was no tappedout URL, then let's pass over this one. if url is None: rds.sadd('SEEN', submission.id) continue ## At this point, we have a deck we'e never seen before that has been posted! # # ~*~ GET EXCITED ~*~ logging.debug("I found a URL to scrape: " + str(url)) # Scrape it deck = tappedout.get_deck(url) if deck is None: logging.warning( 'Skipping this URL because something went wrong. (' + submission.title.encode('utf-8') + ')') rds.sadd('SEEN', submission.id) continue # Go get the recommendations newrecs, outrecs = core.recommend(deck) lands = [] creatures = [] noncreatures = [] for card, score in newrecs: # filter out basic lands from being recommendations if card in ['swamp', 'island', 'plains', 'mountain', 'forest']: continue # there is an annoying thing that happens when people use snow-covered basics # where edhrec will post basic lands as a recommendation. this prevents that if score < .3: continue score = int(score * 100) # make score easier to read try: types = core.lookup_card(card)['types'] except: logging.warn( 'something went wong with the card %s, ignoring it' % card) continue if 'Creature' in types: creatures.append((card, score)) elif 'Land' in types: lands.append((card, score)) else: noncreatures.append((card, score)) # build the output string if str(submission.subreddit).lower() in ['edhrec', 'edh']: out_str = [ 'Other decks like yours use:\n\nCreatures | Non-creatures | Lands | Unique in your deck\n:--------|:---------|:---------|:--------' ] for i in range(16): try: c = '[%d] %s ' % (creatures[i][1], linkify( creatures[i][0])) except IndexError: c = ' ' try: n = '[%d] %s ' % (noncreatures[i][1], linkify(noncreatures[i][0])) except IndexError: n = ' ' try: l = '[%d] %s ' % (lands[i][1], linkify(lands[i][0])) except IndexError: l = ' ' try: u = '%s ' % linkify(outrecs[i][0]) except IndexError: u = ' ' if len(c + n + l) == 3: break out_str.append('%s | %s | %s | %s' % (c, n, l, u)) out_str.append(BOT_NOTICE) elif str(submission.subreddit).lower() == 'edh': pass elif str(submission.subreddit).lower() == 'competetiveedh': pass # Post the comment! if not TESTING: submission.add_comment('\n'.join(out_str)) logging.debug('comment i think I posted:\n' + '\n'.join(out_str)) logging.debug("I posted a comment with recommendations!") # Keep track of the fact that I've now processed this deck. # It is important that this is last in case the scraping fails and # the problem is later fixed. if not TESTING: rds.sadd('SEEN', submission.id) core.add_deck(deck) sleep() logging.debug('DONE WITH SUBMISSION SEEK AT ' + str(datetime.datetime.now()))
def rec(self, to=None, ref=None): to = to[:500].strip() if ref is None: ref = "No ref" ref = ref[:20].strip() cherrypy.response.headers['Content-Type']= 'application/json' cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if not ('tappedout.net/mtg-decks' in to or 'mtgsalvation.com/forums/' in to \ or 'deckstats.net/deck' in to or 'gracefulstats.com/de' in to): raise ValueError('invalid deck url %s . it should look like http://tappedout.net/mtg-decks/xxxx or http://www.mtgsalvation.com/forums/xxxx or http://deckstats.net/decks/xxxx/xxxx or http://www.gracefulstats.com/deck/view/xxxx' % to) ip = cherrypy.request.remote.ip r = core.get_redis() if r.exists('api' + str(ip)): logging.warn('%s ip is overloading' % str(ip)) return json.dumps('Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=1) deck = None if 'tappedout' in to: deck = tappedout.get_deck(to) elif 'mtgsalvation' in to: deck = mtgsalvation.scrape_deck(to) elif 'deckstats' in to: deck = deckstatscom.scrapedeck(to) elif 'gracefulstats' in to: deck = gracefulstats.scrapedeck(to) deck['scrapedate'] = str(datetime.datetime.now()) if deck['commander'] == 'jedit ojanen': raise ValueError('You input a deck without a valid commander. Please go back and add it to the web interface.') core.add_recent(to, \ core.cap_cardname(deck['commander'])) hashkey = 'CACHE_REC_' + core.hash_pyobj([deck['cards']] + [deck['commander']]) if r.exists(hashkey): return r.get(hashkey) newrecs, outrecs, topk = core.recommend(deck, returnk=True) outnewrecs = [] for cn, sc in newrecs: if sc < .3: continue try: cd = { 'score' : sc, 'card_info' : {'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types']} } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outnewrecs.append(cd) outoutrecs = [] for cn, sc in outrecs: if sc < .5: continue try: cd = { 'score' : sc, 'card_info' : {'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types']} } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outoutrecs.append(cd) deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['ip'] = str(ip) try: deck['headref'] = cherrypy.request.headerMap['Referer'] except AttributeError: pass core.add_deck(deck) stats = deckstats.tally([deck]) kstats = deckstats.tally(topk) cstats = deckstats.get_commander_stats(deck['commander']) output_json = json.dumps({'url' : to, 'recs' : outnewrecs, 'cuts' : outoutrecs, \ 'stats' : stats, 'kstats' : kstats, 'cstats' : cstats}, indent=4) r.set(hashkey, output_json, ex=60*60*24*3) # 3 days expiration ckey = 'CACHE_COMMANDER_' + deck['commander'].replace(' ', '_') r.delete(ckey) return output_json
def rec(self, to=None, ref=None): to = to[:500] ref = to[:20] if not 'tappedout.net/mtg-decks' in to: raise ValueError('invalid deck url %s . it should look like http://tappedout.net/mtg-decks/xxxx' % to) ip = cherrypy.request.remote.ip r = core.get_redis() cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if r.exists('api' + str(ip)): logging.warn('%s ip is overloading' % str(ip)) return json.dumps('Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=1) if tappedout is None: return json.dumps(None) deck = tappedout.get_deck(to) if deck['commander'] == 'jedit ojanen': raise ValueError('You input a deck without a valid commander. Please go back and add it to the web interface.') core.add_recent(to, \ core.cap_cardname(deck['commander'])) hashkey = 'CACHE_REC_' + core.hash_pyobj([deck['cards']] + [deck['commander']]) if r.exists(hashkey): return r.get(hashkey) newrecs, outrecs = core.recommend(deck) newrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in newrecs if sc > .3 ] outrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in outrecs if sc > .5 ] deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['ip'] = str(ip) try: deck['headref'] = cherrypy.request.headerMap['Referer'] except AttributeError: pass deck['scrapedate'] = str(datetime.datetime.now()) core.add_deck(deck) output_json = json.dumps({'url' : to, 'recs' : newrecs, 'cuts' : outrecs}) r.set(hashkey, output_json, ex=60*60*24*3) # 3 days expiration return output_json