def get_wrassler(nr): try: wrestler = session.query(Wrestler).filter_by(nr=nr).one() except exc.NoResultFound: wrestler = Wrestler(nr=nr) session.add(wrestler) return wrestler
def get_wrestler_score(nr): if nr in SCORE_CACHE: return SCORE_CACHE[nr] score = session.query(Score).join(Match).filter(Score.wrestler_nr==nr).order_by(desc(Match.date)) if score.count() < 1: not_jobber = session.query(Wrestler).get(nr) if not_jobber: logging.info('No score for wrestler: {name}[{nr}], returning base score of {bs}'.format(name=not_jobber.name, nr=nr, bs=BASE_SCORE)) else: logging.debug('No score for jobber [{nr}], returning base score of {bs}'.format(nr=nr, bs=BASE_SCORE)) SCORE_CACHE[nr] = BASE_SCORE else: SCORE_CACHE[nr] = score.first().score return SCORE_CACHE[nr]
def _gimmick(self, gimmick: str, wrestler: Wrestler) -> Gimmick: logger.debug("Looking for gimmick %s", gimmick) '''Return gimmick model. Create one if necessary.''' try: gmk = session.query(Gimmick).filter_by( gimmick=gimmick, wrestler_nr=wrestler.nr).one() except exc.NoResultFound: logger.debug('Creating new gimmick "%s"', gimmick) gmk = Gimmick(gimmick=gimmick, wrestler_nr=wrestler.nr) wrestler.gimmicks.append(gmk) session.add(gmk) return gmk
def get_riser_stuff(wrestler): scores = session.query(Score).filter_by(wrestler_nr=wrestler.nr).\ join(Match).order_by(asc(Match.date)) matches = [] prev_score = BASE_SCORE for score in scores: score_diff = score.score / prev_score prev_score = score.score matches.append((score_diff, score.match)) top = sorted(matches, key=lambda diff: diff[0], reverse=True)[0:5] return [i[1] for i in top]
def get_wrestler_score(nr): if nr in SCORE_CACHE: return SCORE_CACHE[nr] score = session.query(Score).join(Match).filter( Score.wrestler_nr == nr).order_by(desc(Match.date)) if score.count() < 1: not_jobber = session.query(Wrestler).get(nr) if not_jobber: logging.info( 'No score for wrestler: {name}[{nr}], returning base score of {bs}' .format(name=not_jobber.name, nr=nr, bs=BASE_SCORE)) else: logging.debug( 'No score for jobber [{nr}], returning base score of {bs}'. format(nr=nr, bs=BASE_SCORE)) SCORE_CACHE[nr] = BASE_SCORE else: SCORE_CACHE[nr] = score.first().score return SCORE_CACHE[nr]
def _get_promotion(self, promotion_id): ''' Get promotion. Create new if necessary ''' try: promotion = session.query(Promotion).get(promotion_id) except exc.NoResultFound: promotion = None pass if not promotion: promotion_data = cagematchnet.promotion(promotion_id) promotion = Promotion(cm_id=promotion_id, name=promotion_data['name'], abbrevation=promotion_data['abbrevation']) logger.debug("Creating new Promotion: %s", promotion) session.add(promotion) return promotion
else: logging.basicConfig(stream=sys.stderr, level=logging.INFO) logger = logging.getLogger(__name__) force_update = args.update wrestlers = [] if args.wrestler_id: for nr in args.wrestler_id: wrestlers.append(get_wrassler(nr)) elif args.name: try: wrestler = session.query(Wrestler).filter_by(name=args.name).one() except exc.NoResultFound: wrestler = None if wrestler: wrestlers.append(wrestler) else: _wrestlers = cagematchnet.search(args.name.strip()) if len(_wrestlers) == 1: w_nr = list(_wrestlers.keys())[0] wrestlers.append(get_wrassler(w_nr)) logger.info('Found Wrestler %s from cagematch.net', w_nr) else: logging.error('Wrong ammount of results %i', len(_wrestlers)) sys.exit(2)
session.add(new_score) SCORE_CACHE[nr] = new_score.score if __name__ == '__main__': logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) cm = CageMatch() ''' matches = session.query(Match).join(MatchWrestler).join(Wrestler).\ distinct(Match.id).order_by(Match.date, desc(Match.id)) ''' matches = session.query(Match).order_by(asc(Match.date), asc(Match.id)) #filter(Wrestler.pwi >= 502).\ #filter(MatchWrestler.wrestler_id.in_(test)).\ matches_count = matches.count() i = 1 for match in matches.all(): print('#', i, '/', matches_count, ':', match.date,':', match.id) ''' print('>>>', match.type, match.date, ':', match.event_name, '-', match.type_desc, '-', match.resolution) ''' winners = []
def scrape_matches(wrestler_nr, match_offset=0, skip_processed=False): print('Processing %d:%d' % (wrestler_nr, match_offset)) c_file = '/tmp/cm-matches-{wrestler}-{offset}.txt'.format( wrestler=wrestler_nr, offset=match_offset) try: page = open(c_file) if page and skip_processed: logging.debug("Skipping processed wresler") return except: page = cm.get('http://www.cagematch.net/?id=2&page=4', params={ 'nr': wrestler_nr, 's': match_offset }).text open(c_file, 'w').write(page) soup = BeautifulSoup(page) # First row is header rows = soup.select('div.TableContents tr')[1:] if len(rows) == 0: print('Zero rows, whaaaaat?') return for row in rows: cols = row.select('td') event_id = 0 event_name = '' event_date = None promotions = [] titles = [] match_type_desc = None winners = [] losers = [] by = None event_type = 'Event' group = winners title_change = False winners_resolution = MatchWrestler.WINNER losers_resolution = MatchWrestler.LOSER event_date = date( *(strptime(cols[1].get_text().strip(), '%d.%m.%Y')[0:3])) for promotion in cols[2].select('a'): promotions.append(cm.id_from_url(promotion['href'])) match_type = cols[3].find('span', class_='MatchType') if match_type: for cm_match_type in match_type.find_all('a'): title_nr = None try: title_nr = cm.id_from_url(cm_match_type['href']) except: title_name = cm_match_type.get_text().strip() try: title_name = parse_qs( cm_match_type['href'])['search'][0].strip() except: pass try: titles_search = cm.search_title(title_name) print('Found titles:', titles_search) if len(titles_search) == 1: title_nr = titles_search[0] except: logging.warning('Exception while searching title.') if title_nr: print('related title title_nr:', title_nr) titles.append(title_nr) match_type_desc = match_type.get_text().strip().strip(':') card = cols[3].find('span', class_='MatchCard') event_details = cols[3].select('div.MatchEventLine a')[0] event_id = cm.id_from_url(event_details['href']) event_name = cols[3].select('div.MatchEventLine')[0].get_text() match_detail = card.get_text() if not re.search('(^|\\b|\s)defeat(:?s*)($|\\b|\s)', match_detail, flags=re.IGNORECASE): by = 'No Contest' winners_resolution = MatchWrestler.NC losers_resolution = MatchWrestler.NC # Try detecting how nc_how = re.search(r' - (.*)( [\(\[][\d:]+[\)\]]|$)', match_detail) if nc_how: nc_how.group(1) by = nc_how.group(1).strip() logging.info("No contest: '%s' (%s)" % (match_detail, by)) for element in card: if isinstance(element, NavigableString): by_match = re.search('(^|\\b|\s)(by|-)\s+([\w\s]+)($|\\b)', element, flags=re.IGNORECASE) if by_match: by = by_match.group(3).strip() elif re.search('(^|\s|\\b)defeat(:?s*)($|\s|\\b)', element, flags=re.IGNORECASE): group = losers else: if element.strip() not in [ 'and', '[', ']', '(', ')', '&', ',', '-' ]: logging.debug("NavigableString: '%s'" % element) elif element.name == 'a': parts = parse_qs(element['href']) if parts['?id'][0] == '2' and parts['nr']: group.append( (int(parts['nr'][0]), parts['name'][0].strip())) event_type = cols[4].get_text() if cols[3].select('.MatchTitleChange'): title_change = True print(event_id, event_name, event_date, promotions, event_type, match_type_desc, winners, 'vs.', losers, 'by', by, titles, '(%s)' % title_change) ''' Find existing matces. First, retieve approximation of suitable matches, and later check if it might be one we are just processing. ''' existing_match = False winner_list = [i[0] for i in winners] loser_list = [i[0] for i in losers] participiant_list = winner_list + loser_list event_matches = session.query(Match.id).filter_by( event_id=event_id, type=event_type, date=event_date, resolution=by).join(MatchWrestler).filter_by( wrestler_id=wrestler_nr).distinct('Match.id') ''' Loop throug all participiants, and check for correct resolution. ''' for (match_id, ) in event_matches.all(): match_wrestlers = session.query(MatchWrestler).filter_by( match_id=match_id) if len(participiant_list) != match_wrestlers.count(): print('DIFFERENT MATCH, DIFFERENT COUNT') continue existing_match = True for wrestler in match_wrestlers.all(): ''' check for correct resolution ''' if wrestler.resolution >= MatchWrestler.NC and wrestler.wrestler_id not in winner_list: existing_match = False break elif wrestler.resolution < MatchWrestler.NC and wrestler.wrestler_id not in loser_list: existing_match = False break if existing_match == True: break if existing_match == True: logging.info("Match '%s' exists, skipping", event_name) print('!!! EXISTING MATCH') continue ''' End of existing match checks ... ''' match = Match(event_id=event_id, event_name=event_name, date=event_date, type=event_type, type_desc=match_type_desc) if by: match.resolution = by session.add(match) for (wrestler_id, gimmick) in winners: match.wrestlers.append( MatchWrestler(wrestler_id=wrestler_id, gimmick_id=get_gimmick_id(wrestler_id, gimmick), resolution=winners_resolution)) for (wrestler_id, gimmick) in losers: match.wrestlers.append( MatchWrestler(wrestler_id=wrestler_id, gimmick_id=get_gimmick_id(wrestler_id, gimmick), resolution=losers_resolution)) # Promotion for match for promotion in promotions: match.promotions.append(MatchPromotion(promotion_id=promotion)) for title in titles: match.titles.append(MatchTitle(title_id=title, change=title_change)) ## TODO: Add event? if len(rows) == 100: return scrape_matches(wrestler_nr, match_offset + 100) return
action='store_true') args = cmdline.parse_args() if args.debug: logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) logger = logging.getLogger(__name__) skip_processed = not args.full cm = cagematchnet # HACK if len(args.wrestler_id): workers = session.query(Wrestler).filter( Wrestler.nr.in_(args.wrestler_id)) else: # Wrestlers over 1000000 have no cagematch page. workers = session.query(Wrestler).filter( Wrestler.nr < 1000000).order_by(Wrestler.nr) if workers.count() == 0: logging.error('No wrestler found.') sys.exit(128) i = 1 for worker in workers.all(): print('Processing wrestler #', i, worker.name) scrape_matches(worker.nr, skip_processed=skip_processed)
translations = gettext.translation('bookstrong', localedir='ass/locale', languages=['ja']) translations.install() tpl = Environment(loader=FileSystemLoader('tpl'), extensions=['jinja2.ext.i18n']) tpl.install_gettext_translations(translations) tpl.filters['timetag'] = format_datetime tpl.filters['img'] = get_image_path wd = WikiData() to_date = date.today() prev_date = date(to_date.year, to_date.month-1, 1) promotions = session.query(Promotion).join(Wrestler).filter(Wrestler.nr != None).all() ranking = Ranking(limit=40) # Find highest riser, and worst max_r_risen = max_s_risen = -10000 max_r_dropped = 0 score_riser = rank_riser = rank_dropper = None for w in ranking: get_face(w) rank = ranking.get_rank(w) prev_rank = ranking.get_previous_rank(w) if prev_rank is None or rank is None:
if args.debug: logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) else: logging.basicConfig(stream=sys.stderr, level=logging.INFO) logger = logging.getLogger() debugging = lgger.isEnabledFor(logging.DEBUG) ''' matches = session.query(Match).join(MatchWrestler).join(Wrestler).\ distinct(Match.id).order_by(Match.date, desc(Match.id)) ''' if args.full: matches = session.query(Match).distinct(Match.id).order_by( asc(Match.date), desc(Match.id)) else: last = session.query(Score).order_by(desc(Score.id)).limit(1).one() matches = session.query(Match).distinct(Match.id).order_by( asc(Match.date), desc(Match.id)).filter(Match.id > last.match.id) matches_count = matches.count() i = 1 for match in matches.all(): print('#', i, '/', matches_count, ':', match.date, ':', match.id) ''' print('>>>', match.type, match.date, ':', match.event_name, '-', match.type_desc, '-', match.resolution) '''