def find_match_status(elem): match_status = None if element_has_class(elem, "event__match--live"): match_status = MatchStatus.Live else: try: elem.find_element_by_class_name("event__time") # Match is scheduled match_status = MatchStatus.Scheduled except NoSuchElementException: # Match is not scheduled pass if match_status is None: status_str = elem.find_element_by_class_name("event__stage--block").text if status_str == "Finished": match_status = MatchStatus.Finished elif status_str == "Walkover": match_status = MatchStatus.Walkover elif status_str == "Cancelled": match_status = MatchStatus.Cancelled elif "retired" in status_str: match_status = MatchStatus.Retired if match_status is None: msg = "Status '{0}' Not Found".format(status_str) log_to_file(msg, MATCHES_ERROR_LOGS) log("status", msg) return match_status
def match_and_notify_subscriber(self, message, addr): if message.id not in self.publish_history: self.publish.notify(message.event, message.message, BROKER_NAME) self.match_events_broker(message, addr) self.publish_history.append(message.id) else: log_to_file("already published")
def build_predictions(): collection = get_matches_collection() if collection.count_documents({ "status": "Scheduled", "prediction": { "$exists": False } }) == 0: # No new match to predict log_to_file("No new match to predict", PREDICT_LOGS) return my_pipeline = load("tennis_prediction.joblib") matches_to_predict = q_get_unpredicted_matches() matches_to_predict = matches_to_predict.astype( get_match_dtypes(matches_to_predict)) predictions = get_predictions(matches_to_predict, my_pipeline) matches = pd.concat([matches_to_predict, predictions], axis=1) matches_json = get_embedded_matches_json(matches) for match_json in matches_json: q_update_match(match_json)
def match_events_broker(self, msg, addr): ms = Messenger() for neighbour_ip in self.neighbour: if neighbour_ip != addr[ 0]: #and addr[0] in self.neighbour.keys() and msg.event in self.neighbour[neighbour_ip]: log_to_file(str(neighbour_ip) + "!=" + str(addr[0])) ms.send_message(msg, neighbour_ip)
def update_match(match): try: matches_json = get_embedded_matches_json(pd.DataFrame(match).T) q_update_match(matches_json[0]) log_to_file("match '{0}' has been updated".format(match["_id"]), MATCHES_LOGS) except Exception as ex: log_to_file("match '{0}' couldn't be updated".format(match["match_id"]), MATCHES_ERROR_LOGS) log("scrap_match", "match '{0}' couldn't be updated".format(match["match_id"]), type(ex).__name__)
def process_match_row(elem, matches_date): elem_id = elem.get_attribute("id") match_id_regex = re.search("^._._(.*)$", elem_id) match_id = match_id_regex.group(1) match_status = find_match_status(elem) if match_status is None: msg = "Status not found for match '{0}'".format(match_id) log_to_file(msg, MATCHES_ERROR_LOGS) log("status", MATCHES_ERROR_LOGS) return match_found = q_find_match_by_id(match_id) if match_found is not None: # Match exists if MatchStatus[match_found["status"]] not in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Awarded]: # Match is not recorded as 'finished' in database if match_status in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Live, MatchStatus.Awarded]\ or (match_status == MatchStatus.Interrupted and MatchStatus[match_found["status"]] != MatchStatus.Interrupted): # Match is finished or live match = scrap_match_flashscore(match_id, match_status) match["_id"] = match_found["_id"] update_match(match) elif match_status in [MatchStatus.Walkover, MatchStatus.Cancelled]: # Match has been canceled delete_match(match_found["_id"]) print("Delete match '{0}'".format(match_id)) pass elif match_status == MatchStatus.Scheduled: # Updating match datetime if changed time_elem = elem.find_element_by_class_name("event__time").text time_regex = re.search(r"(\d{2}):(\d{2})$", time_elem) hour = int(time_regex.group(1)) minute = int(time_regex.group(2)) match_date = datetime(matches_date.year, matches_date.month, matches_date.day, hour, minute) if match_found["datetime"] != match_date: match_dict = {'datetime': match_date, '_id': match_found["_id"]} match = pd.Series(match_dict) update_match(match) else: # Match doesn't exist match = None if match_status not in [MatchStatus.Walkover, MatchStatus.Cancelled]: # Scrap match preview match = scrap_match_flashscore(match_id, match_status) if match is None: return create_match(match)
def create_tournament(tournament): result = q_create_tournament(tournament.to_dict()) if result is None: msg = "couldn't create tournament '{0}'".format(tournament["flash_id"]) log_to_file(msg, TOURNAMENT_LOGS) log("create_tournament", msg) else: log_to_file( "tournament '{0}' has been created".format(tournament["flash_id"]), TOURNAMENT_LOGS)
def create_match(match): try: matches_json = get_embedded_matches_json(pd.DataFrame(match).T) result = q_create_match(matches_json[0]) if not result: raise Exception("Match not created") log_to_file("match '{0}' has been created".format(match["match_id"]), MATCHES_LOGS) except Exception as ex: log_to_file("match '{0}' couldn't be created".format(match["match_id"]), MATCHES_ERROR_LOGS) log("scrap_match", "match '{0}' couldn't be created".format(match["match_id"]), type(ex).__name__)
def update_tournament(tournament): try: tournaments_json = get_dataframe_json(pd.DataFrame(tournament).T) q_update_tournament(tournaments_json[0]) log_to_file( "tournament '{0}' has been updated".format(tournament["_id"]), TOURNAMENT_LOGS) except Exception as ex: msg = "tournament '{0}' couldn't be updated".format( tournament["flash_id"]) log_to_file(msg, TOURNAMENT_LOGS) log("tournament_update", msg, type(ex).__name__)
def retrieve_player_rank_info(player_id, all_player_ranks=None): """Retrieve player rank and rank_points""" if all_player_ranks is None: all_player_ranks = retrieve_all_player_ranks() rank_info = all_player_ranks[all_player_ranks["player_id"] == player_id] if len(rank_info.index) == 1: return rank_info.iloc[0]["rank"], rank_info.iloc[0]["rank_points"] else: msg = "Player rank info not found for player '{0}'".format(player_id) log_to_file(msg, RANKS_LOGS) log("player_rank", msg) return None, None
def notify_subscribers(self, recipient, message, broker_name, subject="Email alert from CDS"): try: log_to_file(",".join(recipient)+" "+message) ip = subprocess.check_output(" awk 'END{print $1}' /etc/hosts ", encoding='utf-8', stderr=subprocess.STDOUT, shell=True) server = smtplib.SMTP_SSL('smtp.gmail.com', 465) server.ehlo() server.login(SENDER, PASSWORD) email_text = """Subject: %s\n%s \n Broker name - %s\n Broker IP - %s""" % (subject, message, broker_name, str(ip)) # print(email_text) server.sendmail(SENDER, recipient, email_text) server.close() print("Email sent to "+str(recipient), file=sys.stderr) except Exception as e: print('Something went wrong...', e)
def process_message(self, msg, addr): log_to_file(str(msg.__dict__)) if msg.action == msg_util.SUBSCRIBE: self.add_subscriber(msg, addr) elif msg.action == msg_util.PUBLISH: self.match_and_notify_subscriber(msg, addr) elif msg.action == "show": ls = self.db.get_mail_list_for_event(msg.event, BROKER_NAME) log_to_file("".join(ls)) elif msg.action == "neighbours": log_to_file(str(self.neighbour)) else: log_to_file("not identifies" + str(msg.__dict__))
def scrap_player_id(player_name): atptour_name = atptour_id = None driver = get_chrome_driver() match_url = 'https://www.atptour.com/en/-/ajax/playersearch/PlayerUrlSearch?searchTerm={}'.format( player_name) driver.get(match_url) time.sleep(1) html = driver.find_element_by_tag_name("pre").get_attribute('innerHTML') json_obj = json.loads(html) elements = json_obj["items"] player_element = None if len(elements) == 0: names = player_name.split() if len(names) > 2: minimized_name = names[0] + " " + names[-1] driver.quit() return scrap_player_id(minimized_name) msg = "'{0}' not found on atptour website".format(player_name) log_to_file(msg, PLAYER_LOGS) log("players", msg) else: for element in elements: if str.lower(element["Key"]) == str.lower(player_name): player_element = element break if player_element is None: player_element = elements[0] atptour_name = player_element["Key"] href = player_element["Value"] href_regex = re.search(".+/(.*)/overview$", href) atptour_id = href_regex.group(1) driver.quit() return atptour_name, atptour_id
def add_player_info(match): """Add p1 and p2 attributes to a match series""" p1 = find_player_by_id(match["p1_id"]) if p1 is None: p1 = scrap_new_player(match["p1_id"], match["p1_url"]) create_player(p1) p2 = find_player_by_id(match["p2_id"]) if p2 is None: p2 = scrap_new_player(match["p2_id"], match["p2_url"]) create_player(p2) if p1 is None or p2 is None: msg = "Couldn't find nor scrap players for match '{0}'".format( match["match_id"]) log_to_file(msg, PLAYER_LOGS) log("players", msg) return match["p1_hand"] = p1["handedness"] match["p1_backhand"] = p1["backhand"] match["p1_ht"] = p1["height"] match["p1_weight"] = p1["weight"] match["p1_age"] = calculate_age(p1["birth_date"]) match["p1_rank"], match["p1_rank_points"] = retrieve_player_rank_info( p1["atp_id"]) match["p1_birth_country"] = p1["birth_country"] match["p1_residence_country"] = p1["residence_country"] match["p2_hand"] = p2["handedness"] match["p2_backhand"] = p2["backhand"] match["p2_ht"] = p2["height"] match["p2_weight"] = p2["weight"] match["p2_age"] = calculate_age(p2["birth_date"]) match["p2_rank"], match["p2_rank_points"] = retrieve_player_rank_info( p2["atp_id"]) match["p2_birth_country"] = p2["birth_country"] match["p2_residence_country"] = p2["residence_country"]
def feature_engineer(): collection = get_matches_collection() if collection.count_documents({"features": {"$exists": False}}) == 0: # No new match to build features log_to_file("No new match to build features", PREDICT_LOGS) return unfeatured_matches = q_get_unfeatured_matches() unfeatured_matches = unfeatured_matches.astype( get_match_dtypes(unfeatured_matches)) past_matches = q_get_past_matches() past_matches = past_matches.astype(get_match_dtypes(past_matches)) features = add_features(unfeatured_matches, past_matches) matches = pd.concat([unfeatured_matches[["_id"]], features], axis=1) matches_json = get_embedded_matches_json(matches) for match_json in matches_json: q_update_match(match_json)
def publish_event(self, event, message, table='events'): log_to_file("publish"+event+table) broker_ip = BrokerManager().get_random_broker() msg = message_utility.get_publish_msg(event, message) Messenger().send_message(msg,broker_ip)
def search_all_tournaments_atptour(): tournaments_atptour = None driver = get_chrome_driver() driver.get("https://www.atptour.com/en/tournaments") time.sleep(3) try: atp_names = [] atp_formatted_names = [] atp_ids = [] elements = driver.find_elements_by_xpath( "//tr[@class='tourney-result']/td[2]/a") for elem in elements: try: url = elem.get_attribute("href") url_regex = re.search("/tournaments/(.*)/(.*)/overview$", url) atp_formatted_name = url_regex.group(1) atp_id = int(url_regex.group(2)) atp_name = elem.text atp_formatted_names.append(atp_formatted_name) atp_ids.append(atp_id) atp_names.append(atp_name) except Exception as ex: atp_formatted_names.append(None) atp_ids.append(None) atp_names.append(None) msg = "atp tournaments retrieval error, tournament '{0}'".format( elem.text) log_to_file(msg, TOURNAMENT_LOGS) log("tournaments", msg, type(ex).__name__) cities = [] countries = [] elements = driver.find_elements_by_xpath( "//tr[@class='tourney-result']/td[2]/span[1]") for elem in elements: location = elem.text try: matched_location = location.split(", ") city = matched_location[0] country = matched_location[-1] cities.append(city) countries.append(country) except Exception as ex: cities.append(None) countries.append(None) msg = "atp tournaments retrieval error, location '{0}'".format( location) log_to_file(msg, TOURNAMENT_LOGS) log("tournaments", msg, type(ex).__name__) start_dates = [] end_dates = [] elements = driver.find_elements_by_xpath( "//tr[@class='tourney-result']/td[2]/span[2]") for elem in elements: date_elem = elem.text try: date_regex = re.search("^(.*) - (.*)$", date_elem) start_date_str = date_regex.group(1) start_date = datetime.strptime(start_date_str, '%Y.%m.%d') end_date_str = date_regex.group(2) end_date = datetime.strptime(end_date_str, '%Y.%m.%d') end_date += timedelta(days=1) start_dates.append(start_date) end_dates.append(end_date) except Exception as ex: start_dates.append(None) end_dates.append(None) #print(type(ex).__name__) #print("atp tournaments retrieval error, date_elem: '{0}'".format(date_elem)) tournaments_atptour = pd.DataFrame({ "atp_id": atp_ids, "atp_name": atp_names, "atp_formatted_name": atp_formatted_names, "city": cities, "country": countries, "start_date": start_dates, "end_date": end_dates }) except Exception as ex: msg = "Tournament header retrieval error" log_to_file(msg, TOURNAMENT_LOGS) log("tournaments", msg, type(ex).__name__) driver.quit() return tournaments_atptour
def scrap_all_player_ranks(log_file_path, pickle_db_path): driver = get_chrome_driver() try: driver.get("https://www.atptour.com/en/rankings/singles") date_str = driver.find_element_by_xpath( "//div[@class='dropdown-wrapper']/div[1]/div/div").text last_ranking_date = datetime.strptime(date_str, '%Y.%m.%d').date() today = date.today() if last_ranking_date != today: # Check if last ranking date on atptour match current date. If not, do not scrap raise ValueError() driver = get_chrome_driver(driver) driver.get( "https://www.atptour.com/en/rankings/singles?rankDate={0}&rankRange=1-5000" .format(date_str.replace(".", "-"))) ranks = [] rank_elems = driver.find_elements_by_class_name("rank-cell") for rank_elem in rank_elems: rank_str = rank_elem.text # Some low-level players has rank suffixed with T because they are ex-aequo rank_str = rank_str.replace("T", "") rank = int(rank_str) ranks.append(rank) points_elems = driver.find_elements_by_xpath( "//td[@class='points-cell']/a") rank_points = [points.text for points in points_elems] rank_points = [int(points.replace(",", "")) for points in rank_points] player_ids = [] player_elems = driver.find_elements_by_xpath( "//td[@class='player-cell']/span[1]/a[1]") for elem in player_elems: href = elem.get_attribute("href") player_id_regex = re.search("players/.*/(.*)/overview", href) player_ids.append(player_id_regex.group(1)) player_ranks = pd.DataFrame({ "rank": ranks, "player_id": player_ids, "rank_points": rank_points }) if record_all_player_ranks(player_ranks): log_to_file("Player ranks successfully updated", log_file_path) db = pickledb.load(pickle_db_path, True) db.set("update_player_ranks_date", date_str) else: raise Exception('Player ranks not recorded') except ValueError: # log_to_file("Player ranks not updated on atptour", log_file_path) pass except Exception as ex: log_to_file("player_ranks update error", log_file_path) log("Player_ranks", str(ex)) pass driver.quit()
import os import message_utility as msg_util from publish import Publish from db import DB import subprocess from msg_sender import Messenger from log import log_to_file NEIGHBORS = os.environ["NEIGHBORS"] BROKER_NAME = os.environ['BROKER_NAME'] log_to_file("Broker ID" + BROKER_NAME) class Broker: """ Broker performs tasks as accept subscribe and publish request. """ def __init__(self): self.publish = Publish() self.db = DB() self.db.create_table_if_not_exists(BROKER_NAME) self.neighbour = {} self.publish_history = [] self.set_network_table_list() # ------------ network table ----------------------------------- def set_network_table_list(self, file_name="neighbours.txt"): self.neighbour = {} """ with open(os.path.join('info',file_name), 'r') as f: for line in f.readlines():
def get_tournament_from_row(driver, elem, matches_date): tournament = None # Look for atp-singles tournaments only -> ignore others category = elem.find_element_by_class_name("event__title--type").text if category != "ATP - SINGLES": return None name = elem.find_element_by_class_name("event__title--name").text # Check if tournament matches are in qualification stage -> ignore qualifications qualification_regex = re.search("Qualification", name) if qualification_regex: return None tournament_name_regex = re.search(r"^([^(]*) \(([^)]*)\)", name) tournament_name = tournament_name_regex.group(1) tournament_country = tournament_name_regex.group(2) tournament_found = find_tournament_by_name(tournament_name) if tournament_found is not None: # Tournament exists if tournament_found["start_date"].year != datetime.now().year: # Tournament to be updated tournament = scrap_tournament(tournament_found, matches_date) if tournament is not None: log_to_file("updating tournament {0}".format(tournament["flash_id"]), TOURNAMENT_LOGS) update_tournament(tournament) else: # Tournament exists and is up-to-date tournament = tournament_found else: # New tournament to be scrapped if tournament_name.startswith("Davis Cup"): # print("Ignoring Davis Cup") return None # Look for tournament id in tournaments menu flash_tournaments = get_flash_tournaments_from_menu(driver) tournament_matched = flash_tournaments[flash_tournaments["name"] == tournament_name] if len(tournament_matched.index) != 1: msg = "Couldn't find flashscore tournament id for '{0}'".format(tournament_name) log_to_file(msg, TOURNAMENT_LOGS) log("tournaments", msg) return None tournament_id = tournament_matched.iloc[0]["flash_id"] tournament_scrapped = scrap_tournament(pd.Series( {"flash_id": tournament_id, "flash_name": tournament_name, "country": tournament_country } ), matches_date) if tournament_scrapped is not None: create_tournament(tournament_scrapped) tournament = tournament_scrapped return tournament
def delete_match(_id): result = q_delete_match(_id) if result is None: log_to_file("match '{0}' not deleted".format(_id), MATCHES_ERROR_LOGS) log("match_delete", "match '{0}' not deleted".format(_id))
def scrap_player(atp_id): driver = get_chrome_driver() match_url = 'https://www.atptour.com/en/players/player/{}/overview'.format( atp_id) driver.get(match_url) time.sleep(0.5) player = pd.Series(dtype='float64') try: player["first_name"] = driver.find_element_by_xpath( "//div[@class='player-profile-hero-name']/div[1]").text player["last_name"] = driver.find_element_by_xpath( "//div[@class='player-profile-hero-name']/div[2]").text player["first_initial"] = player["first_name"][0] if player["first_name"] is not None \ and player["first_name"] != "" else None player["full_name"] = "{0} {1}".format(player["last_name"], player["first_initial"]) birth_date = None try: birth_date_search = driver.find_element_by_xpath( "//span[@class='table-birthday']").text birth_regex = re.search(r"^\(([0-9]*)\.([0-9]*)\.([0-9]*)\)$", birth_date_search) birth_year = birth_regex.group(1) birth_month = birth_regex.group(2) birth_day = birth_regex.group(3) birth_date = datetime(int(birth_year), int(birth_month), int(birth_day)) except Exception as exc: print("problem date") player["birth_date"] = birth_date turned_pro = None try: turned_pro_str = driver.find_element_by_xpath( "//div[@class='player-profile-hero-overflow']/div[2]/div[1]/table/tbody/tr[1]/td[2]/div/div[2]" ).text turned_pro = int(turned_pro_str) except (NoSuchElementException, ValueError): pass player["turned_pro"] = turned_pro weight = None try: weight_str = driver.find_element_by_xpath( "//span[@class='table-weight-lbs']").text weight = int(weight_str) except (NoSuchElementException, ValueError): pass height = None try: height_str = driver.find_element_by_xpath( "//span[@class='table-height-cm-wrapper']").text height_regex = re.search(r"^\(([0-9]*)cm\)$", height_str) if height_regex: height = int(height_regex.group(1)) except (NoSuchElementException, ValueError, TypeError): pass player["weight"] = weight player["height"] = height flag_code = driver.find_element_by_xpath( "//div[@class='player-flag-code']").text player["flag_code"] = flag_code birth_city = birth_country = None try: birth_place = driver.find_element_by_xpath( "//div[@class='player-profile-hero-overflow']/div[2]/div[" "1]/table/tbody/tr[2]/td[1]/div/div[2]").text b_matched_location = birth_place.split(", ") if len(b_matched_location) > 1: birth_city = b_matched_location[0] birth_country = b_matched_location[-1] if not country_exists(birth_country): raise NoSuchElementException("birth_country_not_found") else: raise NoSuchElementException("birth_country_not_found") except NoSuchElementException: pass # Couldn't find player birth place, Setting birth_country with flag_code birth_country = find_country_with_flag_code(flag_code) if birth_country is None: msg = "Couldn't find birth country for player '{0}'".format( atp_id) log_to_file(msg, PLAYER_LOGS) log("players", msg) player["birth_city"] = birth_city player["birth_country"] = birth_country residence_city = residence_country = None try: residence = driver.find_element_by_xpath( "//div[@class='player-profile-hero-overflow']/div[2]/div[" "1]/table/tbody/tr[2]/td[2]/div/div[2]").text r_matched_location = residence.split(", ") if len(r_matched_location) > 1: residence_city = r_matched_location[0] residence_country = r_matched_location[-1] except NoSuchElementException: pass player["residence_city"] = residence_city player["residence_country"] = residence_country handedness = backhand = None try: hands = driver.find_element_by_xpath( "//div[@class='player-profile-hero-overflow']/div[2]/div[" "1]/table/tbody/tr[2]/td[3]/div/div[2]").text hands_matched = hands.split(", ") if len(hands_matched) > 1: handedness = hands_matched[0] backhand = hands_matched[-1] except NoSuchElementException: pass player["handedness"] = handedness player["backhand"] = backhand except Exception as ex: player = None msg = "Couldn't scrap player : atp_id= '{}'".format(atp_id) log_to_file(msg, PLAYER_LOGS) log("players", msg) print(type(ex)) driver.quit() return player
def scrap_match_flashscore(match_id, status): match = pd.Series([match_id], index=["match_id"]) driver = get_chrome_driver() try: match["match_id"] = match_id match_url = "https://www.flashscore.com/match/" + match_id driver.get(match_url) time.sleep(1) tournament_elem = driver.find_element_by_xpath( "//div[contains(@class, 'tournamentHeaderDescription')]/div[1]/span[3]/a" ) tournament_regex = re.search("atp-singles/(.*)/", tournament_elem.get_attribute("href")) match["tournament_id"] = tournament_regex.group(1) add_tournament_info(match) round_regex = re.search(",.*- (.*)$", tournament_elem.text) if round_regex: match["round"] = round_regex.group(1) else: match["round"] = "Group" match["p1_id"], match["p1_url"], match["p2_id"], match["p2_url"] = scrap_player_ids(driver) add_player_info(match) match.drop(columns=["p1_url", "p2_url"], inplace=True) match_date = None try: match_date_elem = driver.find_element_by_xpath("//div[@id='detail']/div[4]/div[1]").text match_date_regex = re.search(r"^([0-9]+)\.([0-9]+)\.([0-9]+) ([0-9]+):([0-9]+)$", match_date_elem) day = int(match_date_regex.group(1)) month = int(match_date_regex.group(2)) year = int(match_date_regex.group(3)) hour = int(match_date_regex.group(4)) minute = int(match_date_regex.group(5)) match_date = pd.to_datetime("{0} {1} {2} {3} {4}".format(year, month, day, hour, minute) , format='%Y %m %d %H %M', utc=True) except Exception as ex: msg = "Error with date format - scraping match '{}'".format(match_id) log_to_file(msg, MATCHES_ERROR_LOGS) log("scrap_match", msg, type(ex).__name__) raise Exception match["datetime"] = match_date ''' Section usefull for scrap_tournament_matches() if status is None: status_elem = driver.find_element_by_xpath("//div[@id='detail']/div[4]/div[3]/div[1]/div[2]/span[1]").text if status_elem == "Finished": status = MatchStatus.Finished else: retired_regex = re.search("retired", status_elem) if retired_regex: status = MatchStatus.Retired else: msg = "status_error - match '{}'".format(match_id) log_to_file(msg, MATCHES_ERROR_LOGS) log("scrap_match", msg) driver.quit() return None ''' match["status"] = status.name if status in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Live, MatchStatus.Awarded, MatchStatus.Interrupted]: if status != MatchStatus.Live: # Set match winner only if match has already finished participant_elems = driver.find_elements_by_xpath("//a[starts-with(@class, 'participantName___')]") if len(participant_elems[-1].find_elements_by_xpath("strong")) == 1: match["p1_wins"] = False else: match["p1_wins"] = True duration_elem = driver.find_element_by_xpath("//div[contains(@class, 'time--overall')]").text duration_regex = re.search("([0-9]+):([0-9]+)", duration_elem) match["minutes"] = int(duration_regex.group(1)) * 60 + int(duration_regex.group(2)) match["p1_s1_gms"], match["p1_tb1_score"] = find_gms_value(1, 1, driver) match["p1_s2_gms"], match["p1_tb2_score"] = find_gms_value(1, 2, driver) match["p1_s3_gms"], match["p1_tb3_score"] = find_gms_value(1, 3, driver) match["p1_s4_gms"], match["p1_tb4_score"] = find_gms_value(1, 4, driver) match["p1_s5_gms"], match["p1_tb5_score"] = find_gms_value(1, 5, driver) match["p2_s1_gms"], match["p2_tb1_score"] = find_gms_value(2, 1, driver) match["p2_s2_gms"], match["p2_tb2_score"] = find_gms_value(2, 2, driver) match["p2_s3_gms"], match["p2_tb3_score"] = find_gms_value(2, 3, driver) match["p2_s4_gms"], match["p2_tb4_score"] = find_gms_value(2, 4, driver) match["p2_s5_gms"], match["p2_tb5_score"] = find_gms_value(2, 5, driver) driver.find_element_by_link_text("Statistics").click() time.sleep(0.5) row_elements = driver.find_elements_by_xpath("//div[starts-with(@class, 'statRow___')]") # stat_elem.find_elements_by_class_name("statRow") stat_labels = [] p1_stats = [] p2_stats = [] for row_elem in row_elements: stat_labels.append(row_elem.find_element_by_xpath("div[1]/div[2]").text) p1_stats.append(row_elem.find_element_by_xpath("div[1]/div[1]").text) p2_stats.append(row_elem.find_element_by_xpath("div[1]/div[3]").text) stats_dataframe = pd.DataFrame({"label": stat_labels, "p1": p1_stats, "p2": p2_stats}) match["p1_ace"] = int(stats_dataframe[stats_dataframe["label"] == "Aces"].iloc[0]["p1"]) match["p1_df"] = int(stats_dataframe[stats_dataframe["label"] == "Double Faults"].iloc[0]["p1"]) p1_svpt_elem = stats_dataframe[stats_dataframe["label"] == "Service Points Won"].iloc[0]["p1"] p1_svpt_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_svpt_elem) match["p1_svpt"] = int(p1_svpt_regex.group(3)) match["p1_svpt_won"] = int(p1_svpt_regex.group(2)) match["p1_svpt_ratio"] = int(p1_svpt_regex.group(1)) / 100 p1_1st_elem = stats_dataframe[stats_dataframe["label"] == "1st Serve Points Won"].iloc[0]["p1"] p1_1st_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_1st_elem) match["p1_1st_in"] = int(p1_1st_regex.group(3)) match["p1_1st_won"] = int(p1_1st_regex.group(2)) match["p1_1st_won_ratio"] = int(p1_1st_regex.group(1)) / 100 p1_2nd_elem = stats_dataframe[stats_dataframe["label"] == "2nd Serve Points Won"].iloc[0]["p1"] p1_2nd_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_2nd_elem) match["p1_2nd_pts"] = int(p1_2nd_regex.group(3)) match["p1_2nd_won"] = int(p1_2nd_regex.group(2)) match["p1_2nd_won_ratio"] = int(p1_2nd_regex.group(1)) / 100 p1_bp_elem = stats_dataframe[stats_dataframe["label"] == "Break Points Saved"].iloc[0]["p1"] p1_bp_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_bp_elem) match["p1_bp_faced"] = int(p1_bp_regex.group(3)) match["p1_bp_saved"] = int(p1_bp_regex.group(2)) match["p1_bp_saved_ratio"] = int(p1_bp_regex.group(1)) / 100 match["p2_ace"] = int(stats_dataframe[stats_dataframe["label"] == "Aces"].iloc[0]["p2"]) match["p2_df"] = int(stats_dataframe[stats_dataframe["label"] == "Double Faults"].iloc[0]["p2"]) p2_svpt_elem = stats_dataframe[stats_dataframe["label"] == "Service Points Won"].iloc[0]["p2"] p2_svpt_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_svpt_elem) match["p2_svpt"] = int(p2_svpt_regex.group(3)) match["p2_svpt_won"] = int(p2_svpt_regex.group(2)) match["p2_svpt_ratio"] = int(p2_svpt_regex.group(1)) / 100 p2_1st_elem = stats_dataframe[stats_dataframe["label"] == "1st Serve Points Won"].iloc[0]["p2"] p2_1st_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_1st_elem) match["p2_1st_in"] = int(p2_1st_regex.group(3)) match["p2_1st_won"] = int(p2_1st_regex.group(2)) match["p2_1st_won_ratio"] = int(p2_1st_regex.group(1)) / 100 p2_2nd_elem = stats_dataframe[stats_dataframe["label"] == "2nd Serve Points Won"].iloc[0]["p2"] p2_2nd_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_2nd_elem) match["p2_2nd_pts"] = int(p2_2nd_regex.group(3)) match["p2_2nd_won"] = int(p2_2nd_regex.group(2)) match["p2_2nd_won_ratio"] = int(p2_2nd_regex.group(1)) / 100 p2_bp_elem = stats_dataframe[stats_dataframe["label"] == "Break Points Saved"].iloc[0]["p2"] p2_bp_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_bp_elem) match["p2_bp_faced"] = int(p2_bp_regex.group(3)) match["p2_bp_saved"] = int(p2_bp_regex.group(2)) match["p2_bp_saved_ratio"] = int(p2_bp_regex.group(1)) / 100 p1_sv_gms_elem = stats_dataframe[stats_dataframe["label"] == "Service Games Won"].iloc[0]["p1"] p1_sv_gms_rgx = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_sv_gms_elem) match["p1_sv_gms"] = int(p1_sv_gms_rgx.group(3)) match["p1_sv_gms_won"] = int(p1_sv_gms_rgx.group(2)) match["p1_sv_gms_won_ratio"] = int(p1_sv_gms_rgx.group(1)) / 100 p2_sv_gms_elem = stats_dataframe[stats_dataframe["label"] == "Service Games Won"].iloc[0]["p2"] p2_sv_gms_rgx = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_sv_gms_elem) match["p2_sv_gms"] = int(p2_sv_gms_rgx.group(3)) match["p2_sv_gms_won"] = int(p2_sv_gms_rgx.group(2)) match["p2_sv_gms_won_ratio"] = int(p2_sv_gms_rgx.group(1)) / 100 match["p1_1st_serve_ratio"] = match["p1_1st_in"] / match["p1_svpt"] if match["p1_svpt"] > 0 else None match["p2_1st_serve_ratio"] = match["p2_1st_in"] / match["p2_svpt"] if match["p2_svpt"] > 0 else None except Exception as ex: msg = "Error while scraping match id '{}'".format(match_id) log_to_file(msg, MATCHES_ERROR_LOGS) log("scrap_match", msg, type(ex).__name__) match = None driver.quit() return match
def search_tournament_atptour(tournament, date_of_matches): flash_id = tournament["flash_id"] tournaments_atptour = search_all_tournaments_atptour() # Tournament already exists - Checking if it has kept same references on atptour if "atp_id" in tournament.index and "atp_formatted_name" in tournament.index: atp_id = tournament["atp_id"] atp_formatted_name = tournament["atp_formatted_name"] tour_matched = tournaments_atptour[ (tournaments_atptour["atp_id"] == atp_id) & (tournaments_atptour["atp_formatted_name"] == atp_formatted_name)] # Tournament has kept same references if len(tour_matched.index) == 1: return tournament # Tournament has new references (changed atp_id) tour_matched = tournaments_atptour[ tournaments_atptour["atp_formatted_name"] == atp_formatted_name] if len(tour_matched.index) == 1: # New tournament kept same formatted_name but new atp_id new_atp_id = tour_matched.iloc[0]["atp_id"] log_to_file( "Tournament '{0}' changed atp_id from '{1}' to '{2}'".format( flash_id, atp_id, new_atp_id), TOURNAMENT_LOGS) tournament["atp_id"] = new_atp_id return tournament # Tournament has new references (changed atp_id and atp_formatted_name) tournament_name = get_tournament_name(tournament["flash_name"]) tour_matched = tournaments_atptour[tournaments_atptour["atp_name"] == tournament_name] if len(tour_matched.index) == 1: # New tournament kept same formatted_name but new atp_id new_atp_id = tour_matched.iloc[0]["atp_id"] new_formatted_name = tour_matched.iloc[0]["atp_formatted_name"] log_to_file( "Tournament '{0}' changed atp_id from '{1}' to '{2}'".format( flash_id, atp_id, new_atp_id), TOURNAMENT_LOGS) log_to_file( "Tournament '{0}' changed atp_formatted_name from '{1}' to '{2}'" .format(flash_id, atp_formatted_name, new_formatted_name), TOURNAMENT_LOGS) tournament["atp_id"] = new_atp_id tournament["atp_formatted_name"] = new_formatted_name return tournament # Tournament new references not found else: msg = "Tournament '{0}' not found, atp_id: '{1}' and atp_formatted_name: '{2}'"\ .format(flash_id, atp_id, atp_formatted_name) log_to_file(msg, TOURNAMENT_LOGS) log("tournament_not_found", msg) return None # New tournament else: tournament_name = get_tournament_name(tournament["flash_name"]) country = tournament["country"] tour_matched = tournaments_atptour[tournaments_atptour["atp_name"] == tournament_name] if len(tour_matched.index) != 1: # Tournament not found by name. Try to find tournament by start date, end date and country tour_matched = tournaments_atptour[ (tournaments_atptour["start_date"] <= pd.Timestamp( date_of_matches)) & (tournaments_atptour["end_date"] >= pd.Timestamp( date_of_matches)) & (tournaments_atptour["country"] == country)] # New tournament references found if len(tour_matched.index) == 1: tournament["atp_id"] = tour_matched.iloc[0]["atp_id"] tournament["atp_formatted_name"] = tour_matched.iloc[0][ "atp_formatted_name"] return tournament # New tournament references not found else: msg = "Tournament '{0}' not found".format(flash_id) log_to_file(msg, TOURNAMENT_LOGS) log("tournament_not_found", msg) return None
import socket import pickle import _thread from broker import Broker from log import log_to_file """ Listener demon: listens to message continuouly """ s = socket.socket() print("Socket successfully created") port = 12345 s.bind(('', port)) s.listen(5) broker = Broker() log_to_file("broker started") while True: # Establish connection with client. c, addr = s.accept() log_to_file('Got connection from' + str(addr)) order = c.recv(1024) message = pickle.loads(order) _thread.start_new_thread(broker.process_message, (message, addr)) # Close the connection with the client c.close()
def main(): # mapping of commands to handlers valid_commands = [ ('create', h_create), ('compile', h_compile), ('train', h_train), ('test', h_test), ('export', h_export), ('interactive', h_interactive), ] # create parser parser = argparse.ArgumentParser(description="RNN Encoder Decoder", fromfile_prefix_chars='@') # global helpstr = "List of commands: " + ', '.join([name for name, handler in valid_commands]) parser.add_argument('commands', type=str, nargs='+', help=helpstr) # data parser.add_argument('--train-src', dest='train_src', type=str, help="Training sentences for source (encoder) network") parser.add_argument('--train-dst', dest='train_dst', type=str, help="Training sentences for destination (decoder) network") parser.add_argument('--train-both', dest='train_both', type=str, help="Training sentences for both encoder and decoder network") parser.add_argument('--validation-src', dest='validation_src', type=str, help="Validation sentences for source (encoder) network") parser.add_argument('--validation-dst', dest='validation_dst', type=str, help="Validation sentences for destination (decoder) network") parser.add_argument('--validation-both', dest='validation_both', type=str, help="Test sentences for both encoder and decoder network") parser.add_argument('--test-src', dest='test_src', type=str, help="Test sentences for source (encoder) network") parser.add_argument('--test-dst', dest='test_dst', type=str, help="Test sentences for destination (decoder) network") parser.add_argument('--test-both', dest='test_both', type=str, help="Test sentences for both encoder and decoder network") # compile parameters parser.add_argument('--embedding-size', dest='embedding_size', type=int, help="Embedding vector size") parser.add_argument('--layers', dest='layer_count', type=int, help="Network layer count") parser.add_argument('--max-sentence-length', dest='maxlen', type=int, help="Maximum sentence length") parser.add_argument('--optimizer', dest='optimizer', type=str, default='adagrad', help="Optimizer type (rmsprop, sgd, adadelta, adagrad)") parser.add_argument('--compile-train', dest='compile_train', type=str, default='True', help="Compile training functions for model") # train parameters parser.add_argument('--batch-size', dest='batch_size', type=int, default=16, help="Training batch size") parser.add_argument('--validation-skip', dest='validation_skip', type=float, default=10, help="Amount of epochs to skip before outputting validation translations") parser.add_argument('--snapshot-skip', dest='snapshot_skip', type=float, default=10, help="Amount of epochs to skip between snapshots") parser.add_argument('--lr-encoder', dest='lr_encoder', type=float, default=None, help="Learning rate for encoder") parser.add_argument('--lr-decoder', dest='lr_decoder', type=float, default=None, help="Learning rate for decoder") parser.add_argument('--lr-both', dest='lr_both', type=float, default=None, help="Learning rate for both") parser.add_argument('--epoch-start', dest='epoch_start', type=int, default=0, help="Starting epoch") parser.add_argument('--sample-size', dest='sample_size', type=int, default=200, help="Sample size for validation loss/test+validation statistics (if <= 0: use the entire sets)") parser.add_argument('--show-multiple', dest='show_multiple', type=str, default='false', help="Show top-N for each translation") # trianing thresholds parser.add_argument('--epochs', dest='epochs', type=int, default=None, help="Cutoff for training (number of epochs)") parser.add_argument('--error', dest='error', type=float, default=None, help="Cutoff for training (test and validation error)") parser.add_argument('--seconds', dest='seconds', type=float, default=None, help="Cutoff for training (total seconds elapsed)") parser.add_argument('--loss', dest='loss', type=float, default=None, help="Cutoff for training (test and validation loss)") # test parameters parser.add_argument('--format', dest='test_format', type=str, default='', help="Test output format (options: pairs (default), simple, complex)") # logging parser.add_argument('--log-global', dest='log_glob', type=str, help="Log file for all output") parser.add_argument('--log-info', dest='log_info', type=str, help="Log file for updates (no data dumps)") parser.add_argument('--log-stat', dest='log_stat', type=str, help="Log file for stats (validation accuracy, etc)") # models parser.add_argument('--embedding-src', dest='embedding_src', type=str, help="Input filename for src embedding") parser.add_argument('--embedding-dst', dest='embedding_dst', type=str, help="Input filename for dst embedding") parser.add_argument('--embedding-both', dest='embedding_both', type=str, help="Input filename for both embedding") parser.add_argument('--output-embedding-src', dest='output_embedding_src', type=str, help="Output filename for src embedding") parser.add_argument('--output-embedding-dst', dest='output_embedding_dst', type=str, help="Output filename for dst embedding") parser.add_argument('--output-embedding-both', dest='output_embedding_both', type=str, help="Output filename for both embedding") parser.add_argument('--compiled-model', dest='compiled_model', type=str, help="Input filename for compiled model") parser.add_argument('--output-compiled-model', dest='output_compiled_model', type=str, help="Output filename for compiled model") parser.add_argument('--fitted-model', dest='fitted_model', type=str, help="Input filename for fitted model") parser.add_argument('--output-fitted-model', dest='output_fitted_model', type=str, help="Output filename for fitted model") parser.add_argument('--model-weights', dest='model_weights', type=str, help="Input filename for model weights") parser.add_argument('--output-model-weights', dest='output_model_weights', type=str, help="Output filename for model weights") parser.add_argument('--output-snapshot-prefix', dest='output_snapshot_prefix', type=str, help="Output prefix for snapshots") args = parser.parse_args() # handle 'both' arguments here if args.train_both is not None: args.train_src = args.train_both args.train_dst = args.train_both if args.validation_both is not None: args.validation_src = args.validation_both args.validation_dst = args.validation_both if args.test_both is not None: args.test_src = args.test_both args.test_dst = args.test_both if args.embedding_both is not None: args.embedding_src = args.embedding_both args.embedding_dst = args.embedding_both if args.output_embedding_both is not None: args.output_embedding_src = args.output_embedding_both args.output_embedding_dst = args.output_embedding_both if args.lr_both is not None: args.lr_encoder = args.lr_both args.lr_decoder = args.lr_both # handle logs if args.log_glob is not None: log_to_file('glob', args.log_glob) if args.log_info is not None: log_to_file('info', args.log_info) if args.log_stat is not None: log_to_file('stat', args.log_stat) log("Loaded arguments") print args commands = map(str.lower, args.commands) cache = collections.defaultdict(lambda: None, {'commands': commands}) # check that all commands are valid before executing for command in commands: if command not in map(lambda (name, handler): name, valid_commands): log("Parsed invalid command {0}: exiting".format(command)) exit()
def subscribe_phase3(self, subscriber_mail_id, events): broker_ip = BrokerManager().get_random_broker() msg = message_utility.get_subscriber_msg(events, subscriber_mail_id) log.log_to_file(broker_ip) Messenger().send_message(msg, broker_ip)
def main(): # mapping of commands to handlers valid_commands = [ ('create', h_create), ('compile', h_compile), ('train', h_train), ('test', h_test), ('export', h_export), ('interactive', h_interactive), ] # create parser parser = argparse.ArgumentParser(description="RNN Encoder Decoder", fromfile_prefix_chars='@') # global helpstr = "List of commands: " + ', '.join( [name for name, handler in valid_commands]) parser.add_argument('commands', type=str, nargs='+', help=helpstr) # data parser.add_argument('--train-src', dest='train_src', type=str, help="Training sentences for source (encoder) network") parser.add_argument( '--train-dst', dest='train_dst', type=str, help="Training sentences for destination (decoder) network") parser.add_argument( '--train-both', dest='train_both', type=str, help="Training sentences for both encoder and decoder network") parser.add_argument( '--validation-src', dest='validation_src', type=str, help="Validation sentences for source (encoder) network") parser.add_argument( '--validation-dst', dest='validation_dst', type=str, help="Validation sentences for destination (decoder) network") parser.add_argument( '--validation-both', dest='validation_both', type=str, help="Test sentences for both encoder and decoder network") parser.add_argument('--test-src', dest='test_src', type=str, help="Test sentences for source (encoder) network") parser.add_argument( '--test-dst', dest='test_dst', type=str, help="Test sentences for destination (decoder) network") parser.add_argument( '--test-both', dest='test_both', type=str, help="Test sentences for both encoder and decoder network") # compile parameters parser.add_argument('--embedding-size', dest='embedding_size', type=int, help="Embedding vector size") parser.add_argument('--layers', dest='layer_count', type=int, help="Network layer count") parser.add_argument('--max-sentence-length', dest='maxlen', type=int, help="Maximum sentence length") parser.add_argument( '--optimizer', dest='optimizer', type=str, default='adagrad', help="Optimizer type (rmsprop, sgd, adadelta, adagrad)") parser.add_argument('--compile-train', dest='compile_train', type=str, default='True', help="Compile training functions for model") # train parameters parser.add_argument('--batch-size', dest='batch_size', type=int, default=16, help="Training batch size") parser.add_argument( '--validation-skip', dest='validation_skip', type=float, default=10, help= "Amount of epochs to skip before outputting validation translations") parser.add_argument('--snapshot-skip', dest='snapshot_skip', type=float, default=10, help="Amount of epochs to skip between snapshots") parser.add_argument('--lr-encoder', dest='lr_encoder', type=float, default=None, help="Learning rate for encoder") parser.add_argument('--lr-decoder', dest='lr_decoder', type=float, default=None, help="Learning rate for decoder") parser.add_argument('--lr-both', dest='lr_both', type=float, default=None, help="Learning rate for both") parser.add_argument('--epoch-start', dest='epoch_start', type=int, default=0, help="Starting epoch") parser.add_argument( '--sample-size', dest='sample_size', type=int, default=200, help= "Sample size for validation loss/test+validation statistics (if <= 0: use the entire sets)" ) parser.add_argument('--show-multiple', dest='show_multiple', type=str, default='false', help="Show top-N for each translation") # trianing thresholds parser.add_argument('--epochs', dest='epochs', type=int, default=None, help="Cutoff for training (number of epochs)") parser.add_argument('--error', dest='error', type=float, default=None, help="Cutoff for training (test and validation error)") parser.add_argument('--seconds', dest='seconds', type=float, default=None, help="Cutoff for training (total seconds elapsed)") parser.add_argument('--loss', dest='loss', type=float, default=None, help="Cutoff for training (test and validation loss)") # test parameters parser.add_argument( '--format', dest='test_format', type=str, default='', help="Test output format (options: pairs (default), simple, complex)") # logging parser.add_argument('--log-global', dest='log_glob', type=str, help="Log file for all output") parser.add_argument('--log-info', dest='log_info', type=str, help="Log file for updates (no data dumps)") parser.add_argument('--log-stat', dest='log_stat', type=str, help="Log file for stats (validation accuracy, etc)") # models parser.add_argument('--embedding-src', dest='embedding_src', type=str, help="Input filename for src embedding") parser.add_argument('--embedding-dst', dest='embedding_dst', type=str, help="Input filename for dst embedding") parser.add_argument('--embedding-both', dest='embedding_both', type=str, help="Input filename for both embedding") parser.add_argument('--output-embedding-src', dest='output_embedding_src', type=str, help="Output filename for src embedding") parser.add_argument('--output-embedding-dst', dest='output_embedding_dst', type=str, help="Output filename for dst embedding") parser.add_argument('--output-embedding-both', dest='output_embedding_both', type=str, help="Output filename for both embedding") parser.add_argument('--compiled-model', dest='compiled_model', type=str, help="Input filename for compiled model") parser.add_argument('--output-compiled-model', dest='output_compiled_model', type=str, help="Output filename for compiled model") parser.add_argument('--fitted-model', dest='fitted_model', type=str, help="Input filename for fitted model") parser.add_argument('--output-fitted-model', dest='output_fitted_model', type=str, help="Output filename for fitted model") parser.add_argument('--model-weights', dest='model_weights', type=str, help="Input filename for model weights") parser.add_argument('--output-model-weights', dest='output_model_weights', type=str, help="Output filename for model weights") parser.add_argument('--output-snapshot-prefix', dest='output_snapshot_prefix', type=str, help="Output prefix for snapshots") args = parser.parse_args() # handle 'both' arguments here if args.train_both is not None: args.train_src = args.train_both args.train_dst = args.train_both if args.validation_both is not None: args.validation_src = args.validation_both args.validation_dst = args.validation_both if args.test_both is not None: args.test_src = args.test_both args.test_dst = args.test_both if args.embedding_both is not None: args.embedding_src = args.embedding_both args.embedding_dst = args.embedding_both if args.output_embedding_both is not None: args.output_embedding_src = args.output_embedding_both args.output_embedding_dst = args.output_embedding_both if args.lr_both is not None: args.lr_encoder = args.lr_both args.lr_decoder = args.lr_both # handle logs if args.log_glob is not None: log_to_file('glob', args.log_glob) if args.log_info is not None: log_to_file('info', args.log_info) if args.log_stat is not None: log_to_file('stat', args.log_stat) log("Loaded arguments") print args commands = map(str.lower, args.commands) cache = collections.defaultdict(lambda: None, {'commands': commands}) # check that all commands are valid before executing for command in commands: if command not in map(lambda (name, handler): name, valid_commands): log("Parsed invalid command {0}: exiting".format(command)) exit()