def load_cards_from_json(file_path): r = core.get_redis() writecount = 0 # Go through each card in the set... for card, conts in json.load(open(file_path)).items(): # If it has names, it's a split card or fuse card or something if conts.has_key('names'): cardname = core.sanitize_cardname('/'.join(conts['names'])).lower() for name in conts['names']: r.hset('CARDS_JSON', core.sanitize_cardname(name), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname.replace('/', ' // ')), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname.replace('/', ' / ')), json.dumps(conts)) else: cardname = core.sanitize_cardname(conts['name']).lower() r.hset('CARDS_JSON', core.sanitize_cardname(cardname), json.dumps(conts)) writecount += 1 logging.debug('We just wrote ' + str(writecount) + ' card entries into Redis.')
def stats(self): cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() ckey = 'CACHE_STATS' if r.exists(ckey): return r.get(ckey) out = {} m_counts = {} for d in core.get_all_decks(): if not d.has_key('scrapedate'): continue if (datetime.datetime.now() - core.date_from_str(d['scrapedate'])).days <= 30: m_counts.setdefault(core.cap_cardname(d['commander']), 0) m_counts[core.cap_cardname(d['commander'])] += 1 out['topmonth'] = sorted(m_counts.items(), key= lambda x: x[1], reverse=True)[:25] alltime_counts = {} for d in core.get_all_decks(): alltime_counts.setdefault(core.cap_cardname(d['commander']), 0) alltime_counts[core.cap_cardname(d['commander'])] += 1 out['topalltime'] = sorted(alltime_counts.items(), key= lambda x: x[1], reverse=True)[:75] out['deckcount'] = len(core.get_all_decks()) r.set(ckey, json.dumps(out), ex=60*60*3) # 3 hour cache return json.dumps(out)
def randomcmdr(self): cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" cherrypy.response.headers['Content-Type'] = 'application/json' r = core.get_redis() ckey = 'CACHE_COMMANDER_COUNTS' o = r.get(ckey) if o is None: alltime_counts = {} for d in core.get_all_decks(): alltime_counts.setdefault(d['commander'], 0) alltime_counts[d['commander']] += 1 options = [ cmdr for cmdr, cnt in alltime_counts.items() if cnt > 4 ] r.set(ckey, json.dumps(options), ex=60 * 60 * 24 * 5) # 5 day cache else: options = json.loads(o) return self.cmdr(random.choice(options), nolog=True)
async def csrf_middleware(request, handler): async with get_redis() as redis: csrf_token = await redis.get('csrf_token') session = await get_session(request) if request.method in CSRF_CONF['methods'] and session.get( 'csrf_token') == csrf_token: return await handler(request) return web.json_response({'status': '403', 'msg': 'forbiden!'})
def stats(self): cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" cherrypy.response.headers['Content-Type'] = 'application/json' r = core.get_redis() ckey = 'CACHE_STATS' if r.exists(ckey): return r.get(ckey) out = {} w_counts = {} m_counts = {} for d in core.get_all_decks(): if not d.has_key('scrapedate'): continue try: if d['scrapedate'] < '2014-11-28 10:52:53.525961' and d[ 'scrapedate'] > '2014-11-28 03:52:53.525961' and d[ 'ref'] == 'mtgsalvation': continue # this is to prevent the mass load I did from impacting stats except KeyError: pass datedelta = (datetime.datetime.now() - core.date_from_str(d['scrapedate'])).days if datedelta <= 30: m_counts.setdefault(core.cap_cardname(d['commander']), 0) m_counts[core.cap_cardname(d['commander'])] += 1 if datedelta <= 7: w_counts.setdefault(core.cap_cardname(d['commander']), 0) w_counts[core.cap_cardname(d['commander'])] += 1 out['topweek'] = sorted(w_counts.items(), key=lambda x: x[1], reverse=True)[:25] out['topmonth'] = sorted(m_counts.items(), key=lambda x: x[1], reverse=True)[:25] alltime_counts = {} for d in core.get_all_decks(): alltime_counts.setdefault(core.cap_cardname(d['commander']), 0) alltime_counts[core.cap_cardname(d['commander'])] += 1 out['topalltime'] = sorted(alltime_counts.items(), key=lambda x: x[1], reverse=True)[:25] out['deckcount'] = len(core.get_all_decks()) r.set(ckey, json.dumps(out), ex=60 * 60 * 3) # 3 hour cache return json.dumps(out)
def onetimescrape(): for link in frontpages(startpage=1, endpage=100): try: url = 'http://www.mtgsalvation.com' + link cachekey = 'CACHE_MTGSALVATION_%s' % url if not core.get_redis().get(cachekey) is None: continue core.get_redis().set(cachekey, str(datetime.datetime.now()), ex=60*60*4) # 4 hour cache deck = scrape_deck(url) core.add_deck(deck) #core.add_recent(url, core.cap_cardname(deck['commander'])) logging.debug("added a deck, yay! %s" % deck['commander']) except Exception, e: logging.debug('for "%s" : %s' % (url, e))
def stats(self): cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" cherrypy.response.headers['Content-Type']= 'application/json' r = core.get_redis() ckey = 'CACHE_STATS' if r.exists(ckey): return r.get(ckey) out = {} w_counts = {} m_counts = {} for d in core.get_all_decks(): if not d.has_key('scrapedate'): continue try: if d['scrapedate'] < '2014-11-28 10:52:53.525961' and d['scrapedate'] > '2014-11-28 03:52:53.525961' and d['ref'] == 'mtgsalvation': continue # this is to prevent the mass load I did from impacting stats except KeyError: pass datedelta = (datetime.datetime.now() - core.date_from_str(d['scrapedate'])).days if datedelta <= 30: m_counts.setdefault(core.cap_cardname(d['commander']), 0) m_counts[core.cap_cardname(d['commander'])] += 1 if datedelta <= 7: w_counts.setdefault(core.cap_cardname(d['commander']), 0) w_counts[core.cap_cardname(d['commander'])] += 1 out['topweek'] = sorted(w_counts.items(), key= lambda x: x[1], reverse=True)[:25] out['topmonth'] = sorted(m_counts.items(), key= lambda x: x[1], reverse=True)[:25] alltime_counts = {} for d in core.get_all_decks(): alltime_counts.setdefault(core.cap_cardname(d['commander']), 0) alltime_counts[core.cap_cardname(d['commander'])] += 1 out['topalltime'] = sorted(alltime_counts.items(), key= lambda x: x[1], reverse=True)[:25] out['deckcount'] = len(core.get_all_decks()) r.set(ckey, json.dumps(out), ex=60*60*3) # 3 hour cache return json.dumps(out)
def cmdr(self, commander): commander = commander[:50] cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() ckey = 'CACHE_COMMANDER_' + commander.replace(' ', '_') if r.exists(ckey): return r.get(ckey) commander = core.sanitize_cardname(commander) commander = closest_commander(commander) colors = core.color_identity(commander) decks = [ deck for deck in core.get_decks(colors) if deck['commander'] == commander] out = {} out['numdecks'] = len(decks) cards = {} for deck in decks: for card in deck['cards']: cards[card] = {'count' : 0, 'cardname' : card, 'card_info' : core.lookup_card(card)} for deck in decks: for card in deck['cards']: if card == commander: continue if card in ['swamp', 'island', 'mountain', 'forest', 'plains']: continue cards[card]['count'] += 1 out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 and pp['count'] > .1 * len(decks) ] out['commander'] = core.cap_cardname(commander) r.set(ckey, json.dumps(out), ex=60*60*24*7) # 7 day cache return json.dumps(out)
def rec(self, to=None, ref=None): ip = cherrypy.request.remote.ip r = core.get_redis() cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if r.exists('api' + str(ip)): return json.dumps('Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=5) if tappedout is None: return json.dumps(None) deck = tappedout.get_deck(to) newrecs, outrecs = core.recommend(deck) newrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in newrecs if sc > .3 ] outrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in outrecs if sc > .5 ] deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['date'] = str(datetime.datetime.now()) core.add_deck(deck) return json.dumps({'url' : to, 'recs' : newrecs, 'cuts' : outrecs})
def randomcmdr(self): cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" cherrypy.response.headers['Content-Type']= 'application/json' r = core.get_redis() ckey = 'CACHE_COMMANDER_COUNTS' o = r.get(ckey) if o is None: alltime_counts = {} for d in core.get_all_decks(): alltime_counts.setdefault(d['commander'], 0) alltime_counts[d['commander']] += 1 options = [ cmdr for cmdr, cnt in alltime_counts.items() if cnt > 4 ] r.set(ckey, json.dumps(options), ex=60*60*24*5) # 5 day cache else: options = json.loads(o) return self.cmdr(random.choice(options), nolog=True)
def cmdr(self, commander, nolog=False): commander = commander[:50] cherrypy.response.headers['Content-Type'] = 'application/json' cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() commander = core.sanitize_cardname(commander) commander = closest_commander(commander) r = core.get_redis() if not cherrypy.session.has_key('id'): cherrypy.session['id'] = ''.join( random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8)) if not nolog: r.sadd("SESSION_CMDRSEARCH_" + cherrypy.session['id'], commander) ckey = 'CACHE_COMMANDER_' + commander.replace(' ', '_') if r.exists(ckey): return r.get(ckey) colors = core.color_identity(commander) decks = [ deck for deck in core.get_decks(colors) if deck['commander'] == commander ] if len(decks) < 3: return json.dumps({ 'error_code': 'NOT_ENOUGH_DATA', 'message': 'There are not enough decks in my database to generate recommendations for %s' % commander }) out = {} out['numdecks'] = len(decks) cards = {} for deck in decks: for card in deck['cards']: try: cards[card] = {'count' : 0, 'card_info' : {'name' : core.lookup_card(card)['name'], \ 'types' : core.lookup_card(card)['types'], \ 'colors' : core.lookup_card(card).get('colors', []), \ 'cmc' : core.lookup_card(card).get('cmc', 0) \ } } except TypeError: logging.warn( "for some reason card %s could not be looked up, ignoring." % card) continue for deck in decks: for card in deck['cards']: if card == commander: continue if card in ['swamp', 'island', 'mountain', 'forest', 'plains']: continue try: cards[card]['count'] += 1 except KeyError: continue #out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 and pp['count'] > .1 * len(decks) ] out['recs'] = [ pp for pp in sorted(cards.values(), key=(lambda x: -1 * x['count'])) if pp['count'] > 1 ][:125] out['commander'] = core.cap_cardname(commander) out['stats'] = deckstats.get_commander_stats(commander) # kmeans output for subtopics if len(decks) > 15: out['archetypes'] = kmeans.kmeans(commander) r.set(ckey, json.dumps(out), ex=60 * 60 * 24 * 2) # 2 day cache return json.dumps(out)
# Go through each card in the set... for card, conts in json.load(open(file_path)).items(): # If it has names, it's a split card or fuse card or something if conts.has_key('names'): cardname = core.sanitize_cardname('/'.join(conts['names'])).lower() for name in conts['names']: r.hset('CARDS_JSON', core.sanitize_cardname(name), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname.replace('/', ' // ')), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname.replace('/', ' / ')), json.dumps(conts)) else: cardname = core.sanitize_cardname(conts['name']).lower() r.hset('CARDS_JSON', core.sanitize_cardname(cardname), json.dumps(conts)) writecount += 1 logging.debug('We just wrote ' + str(writecount) + ' card entries into Redis.') load_cards_from_json('AllCards.json') for deck in open('decks_sample.json').readlines(): core.add_deck(json.loads(deck)) for cc in [ core.sanitize_cardname(c) for c in open('banlist.txt').read().strip().split('\n') ]: core.get_redis().sadd('BANNED', cc)
import core import json r=core.get_redis() strs = r.keys('DECKS_*') for s in strs: ds = core.get_decks(s, dedup=True) dds = core.dedup_decks(ds) r.delete('DDD_' + s) for deck in dds: r.rpush('DDD_' + s, json.dumps(deck)) r.delete('OLD_' + s) r.rename(s, 'OLD_' + s) r.rename('DDD_' + s, s)
deck = scrape_deck(url) core.add_deck(deck) #core.add_recent(url, core.cap_cardname(deck['commander'])) logging.debug("added a deck, yay! %s" % deck['commander']) except Exception, e: logging.debug('for "%s" : %s' % (url, e)) if __name__ == '__main__': for link in frontpage(page=1): try: url = 'http://www.mtgsalvation.com' + link cachekey = 'CACHE_MTGSALVATION_%s' % url if not core.get_redis().get(cachekey) is None: continue core.get_redis().set(cachekey, str(datetime.datetime.now()), ex=60*60*24*3) # 3 day cache deck = scrape_deck(url) core.add_deck(deck) core.add_recent(url, core.cap_cardname(deck['commander'])) except Exception, e: logging.debug('for "%s" : %s' % (url, e))
def rec(self, to=None, ref=None): to = to[:500] ref = to[:20] if not 'tappedout.net/mtg-decks' in to: raise ValueError('invalid deck url %s . it should look like http://tappedout.net/mtg-decks/xxxx' % to) ip = cherrypy.request.remote.ip r = core.get_redis() cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if r.exists('api' + str(ip)): logging.warn('%s ip is overloading' % str(ip)) return json.dumps('Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=1) if tappedout is None: return json.dumps(None) deck = tappedout.get_deck(to) if deck['commander'] == 'jedit ojanen': raise ValueError('You input a deck without a valid commander. Please go back and add it to the web interface.') core.add_recent(to, \ core.cap_cardname(deck['commander'])) hashkey = 'CACHE_REC_' + core.hash_pyobj([deck['cards']] + [deck['commander']]) if r.exists(hashkey): return r.get(hashkey) newrecs, outrecs = core.recommend(deck) newrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in newrecs if sc > .3 ] outrecs = [ { 'cardname' : cn, 'score' : sc, 'card_info' : core.lookup_card(cn)} for cn, sc in outrecs if sc > .5 ] deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['ip'] = str(ip) try: deck['headref'] = cherrypy.request.headerMap['Referer'] except AttributeError: pass deck['scrapedate'] = str(datetime.datetime.now()) core.add_deck(deck) output_json = json.dumps({'url' : to, 'recs' : newrecs, 'cuts' : outrecs}) r.set(hashkey, output_json, ex=60*60*24*3) # 3 days expiration return output_json
def rec(self, to=None, ref=None): to = to[:500].strip() if ref is None: ref = "No ref" ref = ref[:20].strip() cherrypy.response.headers['Content-Type']= 'application/json' cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if not ('tappedout.net/mtg-decks' in to or 'mtgsalvation.com/forums/' in to \ or 'deckstats.net/deck' in to or 'gracefulstats.com/de' in to): raise ValueError('invalid deck url %s . it should look like http://tappedout.net/mtg-decks/xxxx or http://www.mtgsalvation.com/forums/xxxx or http://deckstats.net/decks/xxxx/xxxx or http://www.gracefulstats.com/deck/view/xxxx' % to) ip = cherrypy.request.remote.ip r = core.get_redis() if r.exists('api' + str(ip)): logging.warn('%s ip is overloading' % str(ip)) return json.dumps('Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=1) deck = None if 'tappedout' in to: deck = tappedout.get_deck(to) elif 'mtgsalvation' in to: deck = mtgsalvation.scrape_deck(to) elif 'deckstats' in to: deck = deckstatscom.scrapedeck(to) elif 'gracefulstats' in to: deck = gracefulstats.scrapedeck(to) deck['scrapedate'] = str(datetime.datetime.now()) if deck['commander'] == 'jedit ojanen': raise ValueError('You input a deck without a valid commander. Please go back and add it to the web interface.') core.add_recent(to, \ core.cap_cardname(deck['commander'])) hashkey = 'CACHE_REC_' + core.hash_pyobj([deck['cards']] + [deck['commander']]) if r.exists(hashkey): return r.get(hashkey) newrecs, outrecs, topk = core.recommend(deck, returnk=True) outnewrecs = [] for cn, sc in newrecs: if sc < .3: continue try: cd = { 'score' : sc, 'card_info' : {'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types']} } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outnewrecs.append(cd) outoutrecs = [] for cn, sc in outrecs: if sc < .5: continue try: cd = { 'score' : sc, 'card_info' : {'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types']} } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outoutrecs.append(cd) deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['ip'] = str(ip) try: deck['headref'] = cherrypy.request.headerMap['Referer'] except AttributeError: pass core.add_deck(deck) stats = deckstats.tally([deck]) kstats = deckstats.tally(topk) cstats = deckstats.get_commander_stats(deck['commander']) output_json = json.dumps({'url' : to, 'recs' : outnewrecs, 'cuts' : outoutrecs, \ 'stats' : stats, 'kstats' : kstats, 'cstats' : cstats}, indent=4) r.set(hashkey, output_json, ex=60*60*24*3) # 3 days expiration ckey = 'CACHE_COMMANDER_' + deck['commander'].replace(' ', '_') r.delete(ckey) return output_json
def rec(self, to=None, ref=None): to = to[:500].strip() if ref is None: ref = "No ref" ref = ref[:20].strip() cherrypy.response.headers['Content-Type'] = 'application/json' cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" if not ('tappedout.net/mtg-decks' in to or 'mtgsalvation.com/forums/' in to \ or 'deckstats.net/deck' in to or 'gracefulstats.com/de' in to): raise ValueError( 'invalid deck url %s . it should look like http://tappedout.net/mtg-decks/xxxx or http://www.mtgsalvation.com/forums/xxxx or http://deckstats.net/decks/xxxx/xxxx or http://www.gracefulstats.com/deck/view/xxxx' % to) ip = cherrypy.request.remote.ip r = core.get_redis() if r.exists('api' + str(ip)): logging.warn('%s ip is overloading' % str(ip)) return json.dumps( 'Too many API calls. Try again in a few seconds.') r.set('api' + str(ip), '', ex=1) deck = None if 'tappedout' in to: deck = tappedout.get_deck(to) elif 'mtgsalvation' in to: deck = mtgsalvation.scrape_deck(to) elif 'deckstats' in to: deck = deckstatscom.scrapedeck(to) elif 'gracefulstats' in to: deck = gracefulstats.scrapedeck(to) deck['scrapedate'] = str(datetime.datetime.now()) if deck['commander'] == 'jedit ojanen': raise ValueError( 'You input a deck without a valid commander. Please go back and add it to the web interface.' ) core.add_recent(to, \ core.cap_cardname(deck['commander'])) hashkey = 'CACHE_REC_' + core.hash_pyobj([deck['cards']] + [deck['commander']]) if r.exists(hashkey): return r.get(hashkey) newrecs, outrecs, topk = core.recommend(deck, returnk=True) outnewrecs = [] for cn, sc in newrecs: if sc < .3: continue try: cd = { 'score': sc, 'card_info': { 'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types'] } } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outnewrecs.append(cd) outoutrecs = [] for cn, sc in outrecs: if sc < .5: continue try: cd = { 'score': sc, 'card_info': { 'name': core.lookup_card(cn)['name'], 'types': core.lookup_card(cn)['types'] } } except TypeError: logging.warn('The card %s failed to do lookup card.' % cn) continue outoutrecs.append(cd) deck['url'] = to if ref is not None: deck['ref'] = ref else: deck['ref'] = 'non-ref api call' deck['ip'] = str(ip) try: deck['headref'] = cherrypy.request.headerMap['Referer'] except AttributeError: pass core.add_deck(deck) stats = deckstats.tally([deck]) kstats = deckstats.tally(topk) cstats = deckstats.get_commander_stats(deck['commander']) output_json = json.dumps({'url' : to, 'recs' : outnewrecs, 'cuts' : outoutrecs, \ 'stats' : stats, 'kstats' : kstats, 'cstats' : cstats}, indent=4) r.set(hashkey, output_json, ex=60 * 60 * 24 * 3) # 3 days expiration ckey = 'CACHE_COMMANDER_' + deck['commander'].replace(' ', '_') r.delete(ckey) return output_json
json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname.replace('/', ' // ')), json.dumps(conts)) r.hset('CARDS_JSON', core.sanitize_cardname(cardname.replace('/', ' / ')), json.dumps(conts)) else: cardname = core.sanitize_cardname(conts['name']).lower() r.hset('CARDS_JSON', core.sanitize_cardname(cardname), json.dumps(conts)) writecount += 1 logging.debug('We just wrote ' + str(writecount) + ' card entries into Redis.') load_cards_from_json('AllCards.json') for deck in open('decks_sample.json').readlines(): core.add_deck(json.loads(deck)) for cc in [ core.sanitize_cardname(c) for c in open('banlist.txt').read().strip().split('\n') ]: core.get_redis().sadd('BANNED', cc)
def seek_submissions(sublimit=200): logging.debug('STARTING SUBMISSION SEEK AT ' + str(datetime.datetime.now())) # Scan edh and edhrec subreddit = PRAW.get_subreddit('edhrec+edh').get_new(limit=sublimit) rds = core.get_redis() # For each submission in newness order... for submission in subreddit: # Check to see if I've scanned this already. If so, pass on it. if not TESTING: if rds.sismember('SEEN', submission.id): continue logging.debug("Scanning " + str(submission.id) + " - " + str(submission.title.encode('utf-8'))) # Fetch the tappedout url url = find_tappedout_url(submission) # If there was no tappedout URL, then let's pass over this one. if url is None: rds.sadd('SEEN', submission.id) continue ## At this point, we have a deck we'e never seen before that has been posted! # # ~*~ GET EXCITED ~*~ logging.debug("I found a URL to scrape: " + str(url)) # Scrape it deck = tappedout.get_deck(url) if deck is None: logging.warning('Skipping this URL because something went wrong. (' + submission.title.encode('utf-8') +')') rds.sadd('SEEN', submission.id) continue # Go get the recommendations newrecs, outrecs = core.recommend(deck) lands = [] creatures =[] noncreatures = [] for card, score in newrecs: # filter out basic lands from being recommendations if card in ['swamp', 'island', 'plains', 'mountain', 'forest']: continue # there is an annoying thing that happens when people use snow-covered basics # where edhrec will post basic lands as a recommendation. this prevents that if score < .3: continue score = int(score * 100) # make score easier to read try: types = core.lookup_card(card)['types'] except: logging.warn('something went wong with the card %s, ignoring it' % card) continue if 'Creature' in types: creatures.append((card, score)) elif 'Land' in types: lands.append((card, score)) else: noncreatures.append((card, score)) # build the output string if str(submission.subreddit).lower() in ['edhrec', 'edh']: out_str = ['Other decks like yours use:\n\nCreatures | Non-creatures | Lands | Unique in your deck\n:--------|:---------|:---------|:--------'] for i in range(16): try: c = '[%d] %s ' % (creatures[i][1], linkify(creatures[i][0])) except IndexError: c = ' ' try: n = '[%d] %s ' % (noncreatures[i][1], linkify(noncreatures[i][0])) except IndexError: n = ' ' try: l = '[%d] %s ' % (lands[i][1], linkify(lands[i][0])) except IndexError: l = ' ' try: u = '%s ' % linkify(outrecs[i][0]) except IndexError: u = ' ' if len(c + n + l) == 3: break out_str.append('%s | %s | %s | %s' % (c, n , l, u)) out_str.append(BOT_NOTICE) elif str(submission.subreddit).lower() == 'edh': pass elif str(submission.subreddit).lower() == 'competetiveedh': pass # Post the comment! if not TESTING: submission.add_comment('\n'.join(out_str)) logging.debug('comment i think I posted:\n' + '\n'.join(out_str)) logging.debug("I posted a comment with recommendations!") # Keep track of the fact that I've now processed this deck. # It is important that this is last in case the scraping fails and # the problem is later fixed. if not TESTING: rds.sadd('SEEN', submission.id) core.add_deck(deck) sleep() logging.debug('DONE WITH SUBMISSION SEEK AT ' + str(datetime.datetime.now()))
def cmdr(self, commander, nolog=False): commander = commander[:50] cherrypy.response.headers['Content-Type']= 'application/json' cherrypy.response.headers['Access-Control-Allow-Origin'] = "*" r = core.get_redis() commander = core.sanitize_cardname(commander) commander = closest_commander(commander) r = core.get_redis() if not cherrypy.session.has_key('id'): cherrypy.session['id'] = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8)) if not nolog: r.sadd("SESSION_CMDRSEARCH_" +cherrypy.session['id'], commander) ckey = 'CACHE_COMMANDER_' + commander.replace(' ', '_') if r.exists(ckey): return r.get(ckey) colors = core.color_identity(commander) decks = [ deck for deck in core.get_decks(colors) if deck['commander'] == commander] if len(decks) < 3: return json.dumps({'error_code' : 'NOT_ENOUGH_DATA', 'message' : 'There are not enough decks in my database to generate recommendations for %s' % commander}) out = {} out['numdecks'] = len(decks) cards = {} for deck in decks: for card in deck['cards']: try: cards[card] = {'count' : 0, 'card_info' : {'name' : core.lookup_card(card)['name'], \ 'types' : core.lookup_card(card)['types'], \ 'colors' : core.lookup_card(card).get('colors', []), \ 'cmc' : core.lookup_card(card).get('cmc', 0) \ } } except TypeError: logging.warn("for some reason card %s could not be looked up, ignoring." % card) continue for deck in decks: for card in deck['cards']: if card == commander: continue if card in ['swamp', 'island', 'mountain', 'forest', 'plains']: continue try: cards[card]['count'] += 1 except KeyError: continue #out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 and pp['count'] > .1 * len(decks) ] out['recs'] = [ pp for pp in sorted(cards.values(), key = (lambda x: -1 * x['count'])) if pp['count'] > 1 ][:125] out['commander'] = core.cap_cardname(commander) out['stats'] = deckstats.get_commander_stats(commander) # kmeans output for subtopics if len(decks) > 15: out['archetypes'] = kmeans.kmeans(commander) r.set(ckey, json.dumps(out), ex=60*60*24*2) # 2 day cache return json.dumps(out)
def seek_submissions(sublimit=200): logging.debug('STARTING SUBMISSION SEEK AT ' + str(datetime.datetime.now())) # Scan edh and edhrec subreddit = PRAW.get_subreddit('edhrec+edh').get_new(limit=sublimit) rds = core.get_redis() # For each submission in newness order... for submission in subreddit: # Check to see if I've scanned this already. If so, pass on it. if not TESTING: if rds.sismember('SEEN', submission.id): continue logging.debug("Scanning " + str(submission.id) + " - " + str(submission.title.encode('utf-8'))) # Fetch the tappedout url url = find_tappedout_url(submission) # If there was no tappedout URL, then let's pass over this one. if url is None: rds.sadd('SEEN', submission.id) continue ## At this point, we have a deck we'e never seen before that has been posted! # # ~*~ GET EXCITED ~*~ logging.debug("I found a URL to scrape: " + str(url)) # Scrape it deck = tappedout.get_deck(url) if deck is None: logging.warning( 'Skipping this URL because something went wrong. (' + submission.title.encode('utf-8') + ')') rds.sadd('SEEN', submission.id) continue # Go get the recommendations newrecs, outrecs = core.recommend(deck) lands = [] creatures = [] noncreatures = [] for card, score in newrecs: # filter out basic lands from being recommendations if card in ['swamp', 'island', 'plains', 'mountain', 'forest']: continue # there is an annoying thing that happens when people use snow-covered basics # where edhrec will post basic lands as a recommendation. this prevents that if score < .3: continue score = int(score * 100) # make score easier to read try: types = core.lookup_card(card)['types'] except: logging.warn( 'something went wong with the card %s, ignoring it' % card) continue if 'Creature' in types: creatures.append((card, score)) elif 'Land' in types: lands.append((card, score)) else: noncreatures.append((card, score)) # build the output string if str(submission.subreddit).lower() in ['edhrec', 'edh']: out_str = [ 'Other decks like yours use:\n\nCreatures | Non-creatures | Lands | Unique in your deck\n:--------|:---------|:---------|:--------' ] for i in range(16): try: c = '[%d] %s ' % (creatures[i][1], linkify( creatures[i][0])) except IndexError: c = ' ' try: n = '[%d] %s ' % (noncreatures[i][1], linkify(noncreatures[i][0])) except IndexError: n = ' ' try: l = '[%d] %s ' % (lands[i][1], linkify(lands[i][0])) except IndexError: l = ' ' try: u = '%s ' % linkify(outrecs[i][0]) except IndexError: u = ' ' if len(c + n + l) == 3: break out_str.append('%s | %s | %s | %s' % (c, n, l, u)) out_str.append(BOT_NOTICE) elif str(submission.subreddit).lower() == 'edh': pass elif str(submission.subreddit).lower() == 'competetiveedh': pass # Post the comment! if not TESTING: submission.add_comment('\n'.join(out_str)) logging.debug('comment i think I posted:\n' + '\n'.join(out_str)) logging.debug("I posted a comment with recommendations!") # Keep track of the fact that I've now processed this deck. # It is important that this is last in case the scraping fails and # the problem is later fixed. if not TESTING: rds.sadd('SEEN', submission.id) core.add_deck(deck) sleep() logging.debug('DONE WITH SUBMISSION SEEK AT ' + str(datetime.datetime.now()))
import core r = core.get_redis() c = 0 for k in r.keys('CACHE_*'): print 'DEL %s' % k c += 1 r.delete(k) print 'deleted', c, 'keys.'