def __setitem__(self, index, value): if index in (0, -2): self.x = value elif index in (1, -1): self.y = value else: raise co.TennisError("unexpected Pair index {}".format(index))
def fire(self): self.returned_value = None if self.external: if self.read_console_out: popen = subprocess.Popen( "%s %s" % (self.cmd, self.args), shell=True, stdout=subprocess.PIPE, env=CMD_ENVIRON, cwd=self.cwd, ) self.console_out = popen.communicate()[0] self.returned_value = popen.wait() return self.returned_value self.returned_value = subprocess.call( "%s %s" % (self.cmd, self.args), shell=True, env=CMD_ENVIRON, cwd=self.cwd, ) return self.returned_value elif type(self.args) == list or type(self.args) == tuple: self.returned_value = self.cmd(*self.args) return self.returned_value elif type(self.args) == dict: self.returned_value = self.cmd(**self.args) return self.returned_value else: raise co.TennisError("unsupported args type: %s" % type(self.args))
def load(filename, createfun=None, keyfun=None, valuefun=None, filterfun=None): """ restore from file каждая пара ключ-значение восстанавливается из одной отдельной строки filterfun применяется после keyfun, valuefun к key, value and skip item if False. """ from common import Struct, StructKey from report_line import SizedValue, ReportLine from stat_cont import Sumator, WinLoss from tennis import Round, Surface, Level from score import Score keyloadfun = keyfun if keyfun is not None else eval valueloadfun = valuefun if valuefun is not None else eval result = createfun() if createfun is not None else {} if os.path.isfile(filename): with open(filename, "r") as fh: for line in fh.readlines(): if line.startswith("#"): continue match = load_line_re.match(line) if match: key = keyloadfun(match.group("key").strip()) value = valueloadfun(match.group("value").strip()) if filterfun is not None and not filterfun(key, value): continue result[key] = value else: raise co.TennisError( "unparsed dict line: '{}'".format(line)) return result
def history_days_personal(sex): """for read data from already prepared files""" if sex == "atp": return int(365 * 6) elif sex == "wta": return int(365 * 6) raise co.TennisError("bad sex {}".format(sex))
def __getitem__(self, index): if index in (0, -2): return self.x elif index in (1, -1): return self.y else: raise co.TennisError("unexpected Pair index {}".format(index))
def create_from_text(text): line_match = SizedValue.line_re.match(text) if line_match: value = float(line_match.group("value")) size = int(line_match.group("size")) return SizedValue(value, size) raise co.TennisError( "SizedValue not created from text: '{}'".format(text))
def max_where_t(num_ingame): if num_ingame[0] > num_ingame[1]: return num_ingame[0], 0 elif num_ingame[0] < num_ingame[1]: return num_ingame[1], 1 else: raise co.TennisError( "bad num_ingame {} max_where_t".format(num_ingame))
def load_variants(variants: Optional[List[cco.Variant]] = None, metric_name: Optional[str] = "Accuracy"): for variant in (apply_variants if variants is None else variants): variant.load_clf(MODEL, metric_name=metric_name) if variant.clf is None: raise co.TennisError( f"fail cb model load {variant.sex} key {variant.key} " f"metric_name {metric_name}")
def create_from_text(text): match = WinLoss.text_re.match(text) if match: multiplier = 0.01 if match.group("percent_sign") else 1.0 win_ratio = float(match.group("value")) * multiplier size = int(match.group("size")) return WinLoss.create_from_ratio(win_ratio, size) raise co.TennisError("unparsed WinLoss text: '{}'".format(text))
def _make_driver(browser, headless): if browser == BROWSER.OPERA: result = _make_driver_opera(headless) elif browser == BROWSER.FIREFOX: result = _make_driver_firefox(headless) elif browser == BROWSER.CHROME: result = _make_driver_chrome(headless) elif browser == BROWSER.PHANTOMJS: result = _make_driver_phantomjs() else: raise co.TennisError("unknown browser: {}".format(browser)) return result
def with_nan_columns(df, columns=None, raise_ifnan=False): """return list of column names with nan""" result = [] check_columns = columns if columns else df.columns for col in check_columns: if df[col].isnull().any(): result.append(str(col)) if result: err_text = "detected columns with nan: {}".format(result) out(err_text) if raise_ifnan: raise co.TennisError(err_text) return result
def create_from_text(text, struct_key=True): line_match = ReportLine.line_re.match(text) if line_match: if struct_key: key = co.StructKey.create_from_text(line_match.group("key")) else: key = line_match.group("key") value = float(line_match.group("value")) size = int(line_match.group("size")) return ReportLine(key, value, size) else: raise co.TennisError("ReportLine not created when struct_key={} " "from text: '{}'".format(struct_key, text))
def __eq__(self, other): if isinstance(other, SyllabledName): if self.syllabled == other.syllabled: return True return self.resemble(other) elif isinstance(other, str): if self.name == other: return True return resemble_words(self.name, other, max_errors=1, ignore_case=False) raise co.TennisError("bad compare {} and {}".format( self.__class__.__name__, type(other)))
def proc_set(self, setitems: SetItems): set_opener_left = setitems.set_opener_side() == co.LEFT for x, y in self._exist_scores: if x == y: is_scr = setitems.exist_scr((x, y), left_opener=set_opener_left) elif abs(x - y) == 1: is_scr = setitems.exist_scr( (x, y) if set_opener_left else (y, x), # adapt to real set-opener side left_opener=not set_opener_left, # set-opener receive ) else: raise co.TennisError( "_score_exist_dict bad scr ex {}, {}".format(x, y)) self.result_dct[(x, y)] = -1 if is_scr is None else int(is_scr)
def tour_events_put_db(tour_events): ok_wta_count, ok_atp_count = 0, 0 for tour_evt in tour_events: if tour_evt.tour_id is None: continue for match in tour_evt.matches: if ( match.rnd is not None and match.date is not None and match.first_player is not None and match.first_player.ident is not None and match.second_player is not None and match.second_player.ident is not None and hasattr(match, "detailed_score") and match.detailed_score is not None ): fst_id, snd_id = match.first_player.ident, match.second_player.ident det_score = match.detailed_score sets_score = match.score.sets_score(full=True) if sets_score[0] < sets_score[1]: # in db left should be winner fst_id, snd_id = snd_id, fst_id det_score = detailed_score_side_reversed(det_score) do_rain_interrupt_match(match) mrec = MatchRec( date=match.date, tour_id=tour_evt.tour_id, rnd=match.rnd, left_id=fst_id, right_id=snd_id, detailed_score=det_score, score=match.score, ) if tour_evt.sex == "wta": dbdet_wta.insert_obj(mrec) elif tour_evt.sex == "atp": dbdet_atp.insert_obj(mrec) else: raise co.TennisError(f"sexless tour_evt {tour_evt}") log.info("inserted {}\n".format(match_to_string(match))) if tour_evt.sex == "wta": ok_wta_count += 1 else: ok_atp_count += 1 return ok_wta_count, ok_atp_count
def get_events( webpage, skip_levels, match_status=MatchStatus.live, company_name="FS", target_date=None, ): from flashscore import make_events if company_name == "FS": return make_events( webpage, skip_levels=skip_levels, match_status=match_status, target_date=target_date, ) else: raise co.TennisError("unsupported company_name: '{}'".format(company_name))
def identify_player(company_name, sex, player_short_name, cou=None): if company_name == "FS": company_key = "flashscore" else: raise co.TennisError( "unexpected company_name '{}'".format(company_name)) if cou is None: player = co.find_first( oncourt_players.players(sex), lambda p: p.disp_name(company_key) == player_short_name, ) else: player = co.find_first( oncourt_players.players(sex), lambda p: p.cou == cou and p.disp_name(company_key) == player_short_name, ) if player is not None: return player abbrname = AbbrName(player_short_name) return abbrname.find_player(oncourt_players.players(sex), sex)
def srv_win_loss(self, side, quadrant=None, setnum=None): fst_deuce_wl, fst_adv_wl, snd_deuce_wl, snd_adv_wl = self.get_all( setnum=setnum) if side.is_left(): deuce_wl = fst_deuce_wl adv_wl = fst_adv_wl elif side.is_right(): deuce_wl = snd_deuce_wl adv_wl = snd_adv_wl else: # return (left_result, right_result) for both return fst_deuce_wl + fst_adv_wl, snd_deuce_wl + snd_adv_wl if quadrant is None: return deuce_wl + adv_wl elif quadrant == co.DEUCE: return deuce_wl elif quadrant == co.ADV: return adv_wl else: raise co.TennisError( "invalid quad {} in srv_win_loss".format(quadrant))
def create_sets_coefs(coefs): if not all(coefs): return None if len(coefs) == 4: d = dict([ ((2, 0), float(coefs[0])), ((2, 1), float(coefs[1])), ((1, 2), float(coefs[2])), ((0, 2), float(coefs[3])), ]) return SetsCoefs(d) elif len(coefs) == 6: d = dict([ ((3, 0), float(coefs[0])), ((3, 1), float(coefs[1])), ((3, 2), float(coefs[2])), ((2, 3), float(coefs[3])), ((1, 3), float(coefs[4])), ((0, 3), float(coefs[5])), ]) return SetsCoefs(d) raise co.TennisError("can not create SetsCoefs from '{}'".format(coefs))
def __init__(self, filename=None, struct_key=True, eval_key=False, items=None): """ Варианты создания: 1) filename, struct_key 2) посл-ть items 3) без аргументов - будет пустой. """ if items: self._report_lines = items[:] assert not filename, "ReportLineList init unexpected file: '{}'".format( filename) elif filename: self._report_lines = [] assert not ( struct_key and eval_key ), "struct_key and eval_key are not compatible when init" assert not items, "ReportLineList init unexpected items: '{}'".format( items) if os.path.isfile(filename): try: with open(filename, "r") as fhandle: for line in fhandle.readlines(): if line.startswith("#"): continue rpt_line = ReportLine.create_from_text( line, struct_key) if eval_key: rpt_line.key = eval(rpt_line.key) self._report_lines.append(rpt_line) except Exception as err: raise co.TennisError( "{} - ReportLineList init failed at file: '{}'".format( err, filename)) else: self._report_lines = []
def goto_date(fsdrv, days_ago, start_date, wait_sec=5): """ goto days_ago into past from start_date (today if start_date is None). if daysago > 0 then go to backward, if daysago=-1 then go to forward (+1 day) :returns target_date if ok, or raise TennisError """ def prev_day_button_coords(): # y=695 with advertise. handy measure at Gennady notebook. y=585 without advertise return 1235, 670 def next_day_button_coords(): return 1235 + 184, 670 def neighbour_day_click(is_backward): import automate2 if is_backward: x, y = prev_day_button_coords() else: x, y = next_day_button_coords() automate2.press_button((x, y)) fsdrv.implicitly_wait(wait_sec) time.sleep(5) target_date = start_date - datetime.timedelta(days=days_ago) for _ in range(abs(days_ago)): if days_ago >= 0: neighbour_day_click(is_backward=True) else: neighbour_day_click(is_backward=False) fsdrv.implicitly_wait(wait_sec) parser = lxml.html.HTMLParser(encoding="utf8") tree = lxml.html.document_fromstring(fsdrv.page(), parser) cur_date = _make_current_date(tree) if cur_date != target_date: raise co.TennisError( "target_date {} != cur_date {} days_ago: {}".format( target_date, cur_date, days_ago)) return cur_date
def fill_data_ending_chrono( df, split, feature_names, label_name, other_names, cat_features_idx, weight_mode=WeightMode.NO, ): assert isinstance(other_names, list) df_spl = splited_by_year(df, split) if weight_mode != WeightMode.NO: weighted_splited(df_spl, weight_mode, label_name) if split is True: X_train, y_train = get_xy(df_spl.train, feature_names, label_name) X_test, y_test = get_xy(df_spl.test, feature_names, label_name) X_eval, y_eval = get_xy(df_spl.eval, feature_names, label_name) if weight_mode != WeightMode.NO: return ( make_data( cat_features_idx, X_train, X_eval, X_test, y_train, y_eval, y_test, w_train=df_spl.train["weight"].values, w_eval=df_spl.eval["weight"].values, w_test=df_spl.test["weight"].values, ), df_spl, ) return ( make_data(cat_features_idx, X_train, X_eval, X_test, y_train, y_eval, y_test), df_spl, ) elif split is False: # eval is empty X_train, y_train = get_xy(df_spl.train, feature_names, label_name) X_test, y_test = get_xy(df_spl.test, feature_names, label_name) if weight_mode != WeightMode.NO: return ( make_data( cat_features_idx, X_train=X_train, X_eval=None, X_test=X_test, y_train=y_train, y_eval=None, y_test=y_test, w_train=df_spl.train["weight"].values, w_eval=None, w_test=df_spl.test["weight"].values, ), df_spl, ) return ( make_data( cat_features_idx, X_train=X_train, X_eval=None, X_test=X_test, y_train=y_train, y_eval=None, y_test=y_test, ), df_spl, ) elif split is None: X_train, y_train = get_xy(df_spl.train, feature_names, label_name) if weight_mode != WeightMode.NO: return ( make_data( cat_features_idx, X_train=X_train, X_eval=None, X_test=None, y_train=y_train, y_eval=None, y_test=None, w_train=df_spl.train["weight"].values, ), df_spl, ) return ( make_data( cat_features_idx, X_train=X_train, X_eval=None, X_test=None, y_train=y_train, y_eval=None, y_test=None, ), df_spl, ) else: raise co.TennisError("invalid split value {}".format(split))
def __delitem__(self, index): raise co.TennisError("ReportLineList immutable del error")
def __setitem__(self, index, value): raise co.TennisError("ReportLineList immutable set error")
def fetch_main(mindaysago, maxdaysago, sex=None): def is_prev_week(in_date): if args.current_week: return False cur_monday = tt.past_monday_date(datetime.date.today()) in_monday = tt.past_monday_date(in_date) return in_monday <= cur_monday flashscore.deep_find_player_mode = True fsdrv = fetch_initialize(sex=sex, yearsnum=1.5) start_datetime = datetime.datetime.now() log.info( "started with daysago {}-{} current_week: {}".format( mindaysago, maxdaysago, args.current_week ) ) wta_tours = weeked_tours.all_tours("wta") atp_tours = weeked_tours.all_tours("atp") warn_dict = defaultdict(lambda: 0) # day -> n_warns wta_cnt, atp_cnt = 0, 0 is_stop = False for daysago in reversed(range(mindaysago, maxdaysago + 1)): date = datetime.date.today() - datetime.timedelta(days=daysago) flashscore.initialize(prev_week=is_prev_week(date)) target_date = flashscore.goto_date(fsdrv, daysago, start_datetime.date()) if target_date is None: raise co.TennisError("fail goto target_date for daysago {}".format(daysago)) tour_events = flashscore.make_events( fsdrv.page(), skip_levels=skip_levels_default(), match_status=MatchStatus.finished, target_date=target_date, ) if len(tour_events) > 0: warn_cnt = trmt_misc.events_deep_ident( tour_events, wta_tours, atp_tours, from_scored=True, warnloghead="unk_id_tours for {}".format(date), ) warn_dict[daysago] += warn_cnt err_cnt = trmt_misc.tour_events_parse_detailed_score(tour_events, fsdrv) warn_dict[daysago] += err_cnt day_wta_cnt, day_atp_cnt = tour_events_put_db(tour_events) log.info( "day {} db-inserted wta_cnt: {} atp_cnt: {}".format( daysago, day_wta_cnt, day_atp_cnt ) ) is_stop = show_errors(tour_events, daysago, warn_dict[daysago]) if is_stop: break wta_cnt += day_wta_cnt atp_cnt += day_atp_cnt commit_sex("wta", daysago, day_wta_cnt) commit_sex("atp", daysago, day_atp_cnt) if daysago != mindaysago: fsdrv.goto_start() # prepare for next goto date if not is_stop: log.info("all db-inserted wta_cnt: {} atp_cnt: {}".format(wta_cnt, atp_cnt)) fetch_finalize(fsdrv) log.info( "{} finished within {}".format( __file__, str(datetime.datetime.now() - start_datetime) ) ) return 0
def to_str(self, match): raise co.TennisError("forgoten implementation of to_str")
def _initialize_results_sex(sex, max_rating, max_rating_dif, min_date=None, max_date=None): sql = """select tours.DATE_T, tours.NAME_T, tours.RANK_T, tours.PRIZE_T, games.ID_R_G, games.RESULT_G, games.ID1_G, games.ID2_G from Tours_{0} AS tours, games_{0} AS games, Players_{0} AS fst_plr where games.ID_T_G = tours.ID_T and games.ID1_G = fst_plr.ID_P and (tours.NAME_T Not Like '%juniors%') and (fst_plr.NAME_P Not Like '%/%') """.format(sex) sql += dba.sql_dates_condition(min_date, max_date) sql += " order by tours.DATE_T;" with closing(dba.get_connect().cursor()) as cursor: for ( tour_dt, tour_name, db_rank, db_money, rnd_id, score_txt, fst_id, snd_id, ) in cursor.execute(sql): date = tour_dt.date() if tour_dt else None if date is None: raise co.TennisScoreError("none date {}".format(tour_name)) if not score_txt: continue scr = sc.Score(score_txt) if scr.retired: continue sets_count = scr.sets_count(full=True) if sets_count != 3 or scr.best_of_five(): continue set3_score = scr[2] if set3_score[0] < set3_score[1]: raise co.TennisScoreError( "right winner unexpected {}".format(scr)) money = oncourt_db.get_money(db_money) rank = None if db_rank is None else int(db_rank) if rank is None: log.error("none rank date: {} scr: {} name: {}".format( date, scr, tour_name)) if not isinstance(rank, int): raise co.TennisError( "not int rank '{}' date: {} scr: {} name: {}".format( rank, date, scr, tour_name)) rawname, level = oncourt_db.get_name_level(sex, tour_name.strip(), rank, money, date) if level in DISABLE_LEVELS: continue if level is None: raise co.TennisError( "none level date: {} scr: {} name: {}".format( date, scr, tour_name)) rnd = tennis.Round.from_oncourt_id(rnd_id) soft_level = tennis.soft_level(level, rnd) if soft_level is None: raise co.TennisError( "none soft_level date: {} scr: {} name: {}".format( date, scr, tour_name)) mdata = _get_match_data(sex, date, fst_id, snd_id, scr, max_rating, max_rating_dif) if mdata is not None: past_monday = tt.past_monday_date(date) ywn = tt.get_year_weeknum(past_monday) data_dict[(sex, soft_level)][ywn][(mdata.set1_score, mdata.set2_score)].hit( mdata.decided_win)
def split_tour_by_weeks(tour): def get_qual_match_date_shifted(tour_date, match_date, rnd): if match_date is not None or not rnd.qualification(): return None # qualifying -> sunday, q-Second -> saturday, q-First -> friday if rnd == "Qualifying": return tour_date + datetime.timedelta(days=6) elif rnd == "q-Second": return tour_date + datetime.timedelta(days=5) elif rnd == "q-First": return tour_date + datetime.timedelta(days=4) def get_mdraw_match_date(tour_date, match_date, rnd): if match_date is not None: return None # qualifying->monday, 1st->tuesday, 2nd->wednesday, 1/4->thursday, 1/2->friday if rnd == "Qualifying": return tour_date elif rnd == "First": return tour_date + datetime.timedelta(days=1) elif rnd == "Second": return tour_date + datetime.timedelta(days=2) elif rnd == "1/4": return tour_date + datetime.timedelta(days=3) elif rnd == "1/2": return tour_date + datetime.timedelta(days=4) elif rnd == "Final": return tour_date + datetime.timedelta(days=5) prevweek_matches_from_rnd = defaultdict(list) curweek_matches_from_rnd = defaultdict(list) nextweek_matches_from_rnd = defaultdict(list) curweek_date = tt.past_monday_date(tour.date) prevweek_date = curweek_date - datetime.timedelta(days=7) nextweek_date = curweek_date + datetime.timedelta(days=7) for rnd, matches in tour.matches_from_rnd.items(): for m in matches: if m.date is not None: if prevweek_date <= m.date < curweek_date: prevweek_matches_from_rnd[rnd].append(m) elif curweek_date <= m.date < nextweek_date: curweek_matches_from_rnd[rnd].append(m) elif nextweek_date <= m.date: nextweek_matches_from_rnd[rnd].append(m) else: raise co.TennisError( "match date {} is not binded with tour: {} match: {}".format( m.date, str(tour), str(m) ) ) else: week_shift = round_week_shift_by_struct(tour, rnd) if week_shift == -1: if rnd.qualification(): m.date = get_qual_match_date_shifted(prevweek_date, m.date, rnd) prevweek_matches_from_rnd[rnd].append(m) elif week_shift == 0: m.date = get_mdraw_match_date(curweek_date, m.date, rnd) curweek_matches_from_rnd[rnd].append(m) else: nextweek_matches_from_rnd[rnd].append(m) result = [] if len(prevweek_matches_from_rnd) > 0: prevweek_tour = copy.deepcopy(tour) prevweek_tour.date = prevweek_date prevweek_tour.matches_from_rnd = prevweek_matches_from_rnd result.append(prevweek_tour) if len(curweek_matches_from_rnd) > 0: # м.б. пусто если онкорт еще не прислал матчи основной сетки curweek_tour = copy.deepcopy(tour) curweek_tour.matches_from_rnd = curweek_matches_from_rnd result.append(curweek_tour) if len(nextweek_matches_from_rnd) > 0: nextweek_tour = copy.deepcopy(tour) nextweek_tour.date = nextweek_date nextweek_tour.matches_from_rnd = nextweek_matches_from_rnd result.append(nextweek_tour) return result
def level_code(level): for code, _level in _code_level.items(): if str(level) == _level: return code raise co.TennisError("unexpected level: " + str(level))