Example #1
0
def find_match_status(elem):
    match_status = None
    if element_has_class(elem, "event__match--live"):
        match_status = MatchStatus.Live
    else:
        try:
            elem.find_element_by_class_name("event__time")
            # Match is scheduled
            match_status = MatchStatus.Scheduled
        except NoSuchElementException:
            # Match is not scheduled
            pass

    if match_status is None:
        status_str = elem.find_element_by_class_name("event__stage--block").text
        if status_str == "Finished":
            match_status = MatchStatus.Finished
        elif status_str == "Walkover":
            match_status = MatchStatus.Walkover
        elif status_str == "Cancelled":
            match_status = MatchStatus.Cancelled
        elif "retired" in status_str:
            match_status = MatchStatus.Retired

        if match_status is None:
            msg = "Status '{0}' Not Found".format(status_str)
            log_to_file(msg, MATCHES_ERROR_LOGS)
            log("status", msg)

    return match_status
Example #2
0
 def match_and_notify_subscriber(self, message, addr):
     if message.id not in self.publish_history:
         self.publish.notify(message.event, message.message, BROKER_NAME)
         self.match_events_broker(message, addr)
         self.publish_history.append(message.id)
     else:
         log_to_file("already published")
Example #3
0
def build_predictions():
    collection = get_matches_collection()

    if collection.count_documents({
            "status": "Scheduled",
            "prediction": {
                "$exists": False
            }
    }) == 0:
        # No new match to predict
        log_to_file("No new match to predict", PREDICT_LOGS)
        return

    my_pipeline = load("tennis_prediction.joblib")

    matches_to_predict = q_get_unpredicted_matches()
    matches_to_predict = matches_to_predict.astype(
        get_match_dtypes(matches_to_predict))

    predictions = get_predictions(matches_to_predict, my_pipeline)

    matches = pd.concat([matches_to_predict, predictions], axis=1)

    matches_json = get_embedded_matches_json(matches)

    for match_json in matches_json:
        q_update_match(match_json)
Example #4
0
 def match_events_broker(self, msg, addr):
     ms = Messenger()
     for neighbour_ip in self.neighbour:
         if neighbour_ip != addr[
                 0]:  #and addr[0] in self.neighbour.keys() and msg.event in self.neighbour[neighbour_ip]:
             log_to_file(str(neighbour_ip) + "!=" + str(addr[0]))
             ms.send_message(msg, neighbour_ip)
Example #5
0
def update_match(match):
    try:
        matches_json = get_embedded_matches_json(pd.DataFrame(match).T)
        q_update_match(matches_json[0])
        log_to_file("match '{0}' has been updated".format(match["_id"]), MATCHES_LOGS)
    except Exception as ex:
        log_to_file("match '{0}' couldn't be updated".format(match["match_id"]), MATCHES_ERROR_LOGS)
        log("scrap_match", "match '{0}' couldn't be updated".format(match["match_id"]), type(ex).__name__)
Example #6
0
def process_match_row(elem, matches_date):
    elem_id = elem.get_attribute("id")
    match_id_regex = re.search("^._._(.*)$", elem_id)
    match_id = match_id_regex.group(1)

    match_status = find_match_status(elem)

    if match_status is None:
        msg = "Status not found for match '{0}'".format(match_id)
        log_to_file(msg, MATCHES_ERROR_LOGS)
        log("status", MATCHES_ERROR_LOGS)
        return

    match_found = q_find_match_by_id(match_id)

    if match_found is not None:
        # Match exists
        if MatchStatus[match_found["status"]] not in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Awarded]:
            # Match is not recorded as 'finished' in database
            if match_status in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Live, MatchStatus.Awarded]\
                    or (match_status == MatchStatus.Interrupted
                        and MatchStatus[match_found["status"]] != MatchStatus.Interrupted):
                # Match is finished or live
                match = scrap_match_flashscore(match_id, match_status)
                match["_id"] = match_found["_id"]
                update_match(match)

            elif match_status in [MatchStatus.Walkover, MatchStatus.Cancelled]:
                # Match has been canceled
                delete_match(match_found["_id"])
                print("Delete match '{0}'".format(match_id))
                pass

            elif match_status == MatchStatus.Scheduled:
                # Updating match datetime if changed
                time_elem = elem.find_element_by_class_name("event__time").text
                time_regex = re.search(r"(\d{2}):(\d{2})$", time_elem)
                hour = int(time_regex.group(1))
                minute = int(time_regex.group(2))
                match_date = datetime(matches_date.year, matches_date.month, matches_date.day, hour, minute)

                if match_found["datetime"] != match_date:
                    match_dict = {'datetime': match_date, '_id': match_found["_id"]}
                    match = pd.Series(match_dict)
                    update_match(match)
    else:
        # Match doesn't exist
        match = None
        if match_status not in [MatchStatus.Walkover, MatchStatus.Cancelled]:
            # Scrap match preview
            match = scrap_match_flashscore(match_id, match_status)

            if match is None:
                return

            create_match(match)
def create_tournament(tournament):
    result = q_create_tournament(tournament.to_dict())
    if result is None:
        msg = "couldn't create tournament '{0}'".format(tournament["flash_id"])
        log_to_file(msg, TOURNAMENT_LOGS)
        log("create_tournament", msg)
    else:
        log_to_file(
            "tournament '{0}' has been created".format(tournament["flash_id"]),
            TOURNAMENT_LOGS)
Example #8
0
def create_match(match):
    try:
        matches_json = get_embedded_matches_json(pd.DataFrame(match).T)
        result = q_create_match(matches_json[0])
        if not result:
            raise Exception("Match not created")
        log_to_file("match '{0}' has been created".format(match["match_id"]), MATCHES_LOGS)

    except Exception as ex:
        log_to_file("match '{0}' couldn't be created".format(match["match_id"]), MATCHES_ERROR_LOGS)
        log("scrap_match", "match '{0}' couldn't be created".format(match["match_id"]), type(ex).__name__)
def update_tournament(tournament):
    try:
        tournaments_json = get_dataframe_json(pd.DataFrame(tournament).T)
        q_update_tournament(tournaments_json[0])

        log_to_file(
            "tournament '{0}' has been updated".format(tournament["_id"]),
            TOURNAMENT_LOGS)
    except Exception as ex:
        msg = "tournament '{0}' couldn't be updated".format(
            tournament["flash_id"])
        log_to_file(msg, TOURNAMENT_LOGS)
        log("tournament_update", msg, type(ex).__name__)
def retrieve_player_rank_info(player_id, all_player_ranks=None):
    """Retrieve player rank and rank_points"""
    if all_player_ranks is None:
        all_player_ranks = retrieve_all_player_ranks()

    rank_info = all_player_ranks[all_player_ranks["player_id"] == player_id]

    if len(rank_info.index) == 1:
        return rank_info.iloc[0]["rank"], rank_info.iloc[0]["rank_points"]
    else:
        msg = "Player rank info not found for player '{0}'".format(player_id)
        log_to_file(msg, RANKS_LOGS)
        log("player_rank", msg)
        return None, None
Example #11
0
    def notify_subscribers(self, recipient, message, broker_name, subject="Email alert from CDS"):
        try:
            log_to_file(",".join(recipient)+" "+message)
            ip = subprocess.check_output(" awk 'END{print $1}' /etc/hosts ", encoding='utf-8', stderr=subprocess.STDOUT,
                                    shell=True)
            
            server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
            server.ehlo()
            server.login(SENDER, PASSWORD)
            email_text = """Subject: %s\n%s \n Broker name - %s\n Broker IP - %s""" % (subject, message, broker_name, str(ip))
            # print(email_text)
            server.sendmail(SENDER, recipient, email_text)
            server.close()

            print("Email sent to "+str(recipient), file=sys.stderr)
        except Exception as e:
            print('Something went wrong...', e)
Example #12
0
 def process_message(self, msg, addr):
     log_to_file(str(msg.__dict__))
     if msg.action == msg_util.SUBSCRIBE:
         self.add_subscriber(msg, addr)
     elif msg.action == msg_util.PUBLISH:
         self.match_and_notify_subscriber(msg, addr)
     elif msg.action == "show":
         ls = self.db.get_mail_list_for_event(msg.event, BROKER_NAME)
         log_to_file("".join(ls))
     elif msg.action == "neighbours":
         log_to_file(str(self.neighbour))
     else:
         log_to_file("not identifies" + str(msg.__dict__))
Example #13
0
def scrap_player_id(player_name):
    atptour_name = atptour_id = None
    driver = get_chrome_driver()
    match_url = 'https://www.atptour.com/en/-/ajax/playersearch/PlayerUrlSearch?searchTerm={}'.format(
        player_name)
    driver.get(match_url)
    time.sleep(1)

    html = driver.find_element_by_tag_name("pre").get_attribute('innerHTML')
    json_obj = json.loads(html)
    elements = json_obj["items"]
    player_element = None

    if len(elements) == 0:
        names = player_name.split()
        if len(names) > 2:
            minimized_name = names[0] + " " + names[-1]
            driver.quit()
            return scrap_player_id(minimized_name)
        msg = "'{0}' not found on atptour website".format(player_name)
        log_to_file(msg, PLAYER_LOGS)
        log("players", msg)
    else:
        for element in elements:
            if str.lower(element["Key"]) == str.lower(player_name):
                player_element = element
                break

        if player_element is None:
            player_element = elements[0]

        atptour_name = player_element["Key"]
        href = player_element["Value"]
        href_regex = re.search(".+/(.*)/overview$", href)
        atptour_id = href_regex.group(1)

    driver.quit()

    return atptour_name, atptour_id
Example #14
0
def add_player_info(match):
    """Add p1 and p2 attributes to a match series"""
    p1 = find_player_by_id(match["p1_id"])
    if p1 is None:
        p1 = scrap_new_player(match["p1_id"], match["p1_url"])
        create_player(p1)

    p2 = find_player_by_id(match["p2_id"])
    if p2 is None:
        p2 = scrap_new_player(match["p2_id"], match["p2_url"])
        create_player(p2)

    if p1 is None or p2 is None:
        msg = "Couldn't find nor scrap players for match '{0}'".format(
            match["match_id"])
        log_to_file(msg, PLAYER_LOGS)
        log("players", msg)
        return

    match["p1_hand"] = p1["handedness"]
    match["p1_backhand"] = p1["backhand"]
    match["p1_ht"] = p1["height"]
    match["p1_weight"] = p1["weight"]
    match["p1_age"] = calculate_age(p1["birth_date"])
    match["p1_rank"], match["p1_rank_points"] = retrieve_player_rank_info(
        p1["atp_id"])
    match["p1_birth_country"] = p1["birth_country"]
    match["p1_residence_country"] = p1["residence_country"]

    match["p2_hand"] = p2["handedness"]
    match["p2_backhand"] = p2["backhand"]
    match["p2_ht"] = p2["height"]
    match["p2_weight"] = p2["weight"]
    match["p2_age"] = calculate_age(p2["birth_date"])
    match["p2_rank"], match["p2_rank_points"] = retrieve_player_rank_info(
        p2["atp_id"])
    match["p2_birth_country"] = p2["birth_country"]
    match["p2_residence_country"] = p2["residence_country"]
Example #15
0
def feature_engineer():
    collection = get_matches_collection()

    if collection.count_documents({"features": {"$exists": False}}) == 0:
        # No new match to build features
        log_to_file("No new match to build features", PREDICT_LOGS)
        return

    unfeatured_matches = q_get_unfeatured_matches()
    unfeatured_matches = unfeatured_matches.astype(
        get_match_dtypes(unfeatured_matches))

    past_matches = q_get_past_matches()
    past_matches = past_matches.astype(get_match_dtypes(past_matches))

    features = add_features(unfeatured_matches, past_matches)

    matches = pd.concat([unfeatured_matches[["_id"]], features], axis=1)

    matches_json = get_embedded_matches_json(matches)

    for match_json in matches_json:
        q_update_match(match_json)
Example #16
0
 def publish_event(self, event, message, table='events'):
     log_to_file("publish"+event+table)
     broker_ip = BrokerManager().get_random_broker()
     msg = message_utility.get_publish_msg(event, message)
     Messenger().send_message(msg,broker_ip)
def search_all_tournaments_atptour():
    tournaments_atptour = None
    driver = get_chrome_driver()
    driver.get("https://www.atptour.com/en/tournaments")
    time.sleep(3)
    try:
        atp_names = []
        atp_formatted_names = []
        atp_ids = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/a")

        for elem in elements:
            try:
                url = elem.get_attribute("href")
                url_regex = re.search("/tournaments/(.*)/(.*)/overview$", url)
                atp_formatted_name = url_regex.group(1)
                atp_id = int(url_regex.group(2))
                atp_name = elem.text

                atp_formatted_names.append(atp_formatted_name)
                atp_ids.append(atp_id)
                atp_names.append(atp_name)
            except Exception as ex:
                atp_formatted_names.append(None)
                atp_ids.append(None)
                atp_names.append(None)
                msg = "atp tournaments retrieval error, tournament '{0}'".format(
                    elem.text)
                log_to_file(msg, TOURNAMENT_LOGS)
                log("tournaments", msg, type(ex).__name__)

        cities = []
        countries = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/span[1]")

        for elem in elements:
            location = elem.text
            try:
                matched_location = location.split(", ")
                city = matched_location[0]
                country = matched_location[-1]

                cities.append(city)
                countries.append(country)
            except Exception as ex:
                cities.append(None)
                countries.append(None)
                msg = "atp tournaments retrieval error, location '{0}'".format(
                    location)
                log_to_file(msg, TOURNAMENT_LOGS)
                log("tournaments", msg, type(ex).__name__)

        start_dates = []
        end_dates = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/span[2]")

        for elem in elements:
            date_elem = elem.text
            try:
                date_regex = re.search("^(.*) - (.*)$", date_elem)
                start_date_str = date_regex.group(1)
                start_date = datetime.strptime(start_date_str, '%Y.%m.%d')

                end_date_str = date_regex.group(2)
                end_date = datetime.strptime(end_date_str, '%Y.%m.%d')
                end_date += timedelta(days=1)

                start_dates.append(start_date)
                end_dates.append(end_date)
            except Exception as ex:
                start_dates.append(None)
                end_dates.append(None)
                #print(type(ex).__name__)
                #print("atp tournaments retrieval error, date_elem: '{0}'".format(date_elem))

        tournaments_atptour = pd.DataFrame({
            "atp_id": atp_ids,
            "atp_name": atp_names,
            "atp_formatted_name": atp_formatted_names,
            "city": cities,
            "country": countries,
            "start_date": start_dates,
            "end_date": end_dates
        })

    except Exception as ex:
        msg = "Tournament header retrieval error"
        log_to_file(msg, TOURNAMENT_LOGS)
        log("tournaments", msg, type(ex).__name__)

    driver.quit()
    return tournaments_atptour
def scrap_all_player_ranks(log_file_path, pickle_db_path):
    driver = get_chrome_driver()
    try:
        driver.get("https://www.atptour.com/en/rankings/singles")

        date_str = driver.find_element_by_xpath(
            "//div[@class='dropdown-wrapper']/div[1]/div/div").text

        last_ranking_date = datetime.strptime(date_str, '%Y.%m.%d').date()
        today = date.today()

        if last_ranking_date != today:
            # Check if last ranking date on atptour match current date. If not, do not scrap
            raise ValueError()

        driver = get_chrome_driver(driver)
        driver.get(
            "https://www.atptour.com/en/rankings/singles?rankDate={0}&rankRange=1-5000"
            .format(date_str.replace(".", "-")))

        ranks = []
        rank_elems = driver.find_elements_by_class_name("rank-cell")
        for rank_elem in rank_elems:
            rank_str = rank_elem.text
            # Some low-level players has rank suffixed with T because they are ex-aequo
            rank_str = rank_str.replace("T", "")
            rank = int(rank_str)
            ranks.append(rank)

        points_elems = driver.find_elements_by_xpath(
            "//td[@class='points-cell']/a")
        rank_points = [points.text for points in points_elems]
        rank_points = [int(points.replace(",", "")) for points in rank_points]

        player_ids = []
        player_elems = driver.find_elements_by_xpath(
            "//td[@class='player-cell']/span[1]/a[1]")
        for elem in player_elems:
            href = elem.get_attribute("href")
            player_id_regex = re.search("players/.*/(.*)/overview", href)
            player_ids.append(player_id_regex.group(1))

        player_ranks = pd.DataFrame({
            "rank": ranks,
            "player_id": player_ids,
            "rank_points": rank_points
        })

        if record_all_player_ranks(player_ranks):
            log_to_file("Player ranks successfully updated", log_file_path)
            db = pickledb.load(pickle_db_path, True)
            db.set("update_player_ranks_date", date_str)
        else:
            raise Exception('Player ranks not recorded')

    except ValueError:
        # log_to_file("Player ranks not updated on atptour", log_file_path)
        pass
    except Exception as ex:
        log_to_file("player_ranks update error", log_file_path)
        log("Player_ranks", str(ex))
        pass

    driver.quit()
Example #19
0
import os
import message_utility as msg_util
from publish import Publish
from db import DB
import subprocess
from msg_sender import Messenger
from log import log_to_file

NEIGHBORS = os.environ["NEIGHBORS"]
BROKER_NAME = os.environ['BROKER_NAME']
log_to_file("Broker ID" + BROKER_NAME)


class Broker:
    """
    Broker performs tasks as accept subscribe and publish request.  
    """
    def __init__(self):
        self.publish = Publish()
        self.db = DB()
        self.db.create_table_if_not_exists(BROKER_NAME)
        self.neighbour = {}
        self.publish_history = []
        self.set_network_table_list()

    # ------------ network table -----------------------------------
    def set_network_table_list(self, file_name="neighbours.txt"):
        self.neighbour = {}
        """
        with open(os.path.join('info',file_name), 'r') as f:
            for line in f.readlines():
Example #20
0
def get_tournament_from_row(driver, elem, matches_date):
    tournament = None
    # Look for atp-singles tournaments only -> ignore others
    category = elem.find_element_by_class_name("event__title--type").text
    if category != "ATP - SINGLES":
        return None

    name = elem.find_element_by_class_name("event__title--name").text

    # Check if tournament matches are in qualification stage -> ignore qualifications
    qualification_regex = re.search("Qualification", name)
    if qualification_regex:
        return None

    tournament_name_regex = re.search(r"^([^(]*) \(([^)]*)\)", name)
    tournament_name = tournament_name_regex.group(1)
    tournament_country = tournament_name_regex.group(2)
    tournament_found = find_tournament_by_name(tournament_name)

    if tournament_found is not None:
        # Tournament exists
        if tournament_found["start_date"].year != datetime.now().year:
            # Tournament to be updated
            tournament = scrap_tournament(tournament_found, matches_date)
            if tournament is not None:
                log_to_file("updating tournament {0}".format(tournament["flash_id"]), TOURNAMENT_LOGS)
                update_tournament(tournament)
        else:
            # Tournament exists and is up-to-date
            tournament = tournament_found

    else:
        # New tournament to be scrapped

        if tournament_name.startswith("Davis Cup"):
            # print("Ignoring Davis Cup")
            return None

        # Look for tournament id in tournaments menu
        flash_tournaments = get_flash_tournaments_from_menu(driver)

        tournament_matched = flash_tournaments[flash_tournaments["name"] == tournament_name]

        if len(tournament_matched.index) != 1:
            msg = "Couldn't find flashscore tournament id for '{0}'".format(tournament_name)
            log_to_file(msg, TOURNAMENT_LOGS)
            log("tournaments", msg)
            return None

        tournament_id = tournament_matched.iloc[0]["flash_id"]

        tournament_scrapped = scrap_tournament(pd.Series(
            {"flash_id": tournament_id,
             "flash_name": tournament_name,
             "country": tournament_country
             }
        ), matches_date)

        if tournament_scrapped is not None:
            create_tournament(tournament_scrapped)
            tournament = tournament_scrapped

    return tournament
Example #21
0
def delete_match(_id):
    result = q_delete_match(_id)

    if result is None:
        log_to_file("match '{0}' not deleted".format(_id), MATCHES_ERROR_LOGS)
        log("match_delete", "match '{0}' not deleted".format(_id))
Example #22
0
def scrap_player(atp_id):
    driver = get_chrome_driver()
    match_url = 'https://www.atptour.com/en/players/player/{}/overview'.format(
        atp_id)
    driver.get(match_url)
    time.sleep(0.5)

    player = pd.Series(dtype='float64')
    try:
        player["first_name"] = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-name']/div[1]").text
        player["last_name"] = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-name']/div[2]").text

        player["first_initial"] = player["first_name"][0] if player["first_name"] is not None \
                                                             and player["first_name"] != "" else None
        player["full_name"] = "{0} {1}".format(player["last_name"],
                                               player["first_initial"])

        birth_date = None
        try:
            birth_date_search = driver.find_element_by_xpath(
                "//span[@class='table-birthday']").text
            birth_regex = re.search(r"^\(([0-9]*)\.([0-9]*)\.([0-9]*)\)$",
                                    birth_date_search)
            birth_year = birth_regex.group(1)
            birth_month = birth_regex.group(2)
            birth_day = birth_regex.group(3)
            birth_date = datetime(int(birth_year), int(birth_month),
                                  int(birth_day))
        except Exception as exc:
            print("problem date")

        player["birth_date"] = birth_date

        turned_pro = None
        try:
            turned_pro_str = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div[1]/table/tbody/tr[1]/td[2]/div/div[2]"
            ).text
            turned_pro = int(turned_pro_str)
        except (NoSuchElementException, ValueError):
            pass

        player["turned_pro"] = turned_pro

        weight = None
        try:
            weight_str = driver.find_element_by_xpath(
                "//span[@class='table-weight-lbs']").text
            weight = int(weight_str)
        except (NoSuchElementException, ValueError):
            pass

        height = None
        try:
            height_str = driver.find_element_by_xpath(
                "//span[@class='table-height-cm-wrapper']").text
            height_regex = re.search(r"^\(([0-9]*)cm\)$", height_str)
            if height_regex:
                height = int(height_regex.group(1))
        except (NoSuchElementException, ValueError, TypeError):
            pass

        player["weight"] = weight
        player["height"] = height

        flag_code = driver.find_element_by_xpath(
            "//div[@class='player-flag-code']").text
        player["flag_code"] = flag_code

        birth_city = birth_country = None
        try:
            birth_place = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div["
                "1]/table/tbody/tr[2]/td[1]/div/div[2]").text
            b_matched_location = birth_place.split(", ")
            if len(b_matched_location) > 1:
                birth_city = b_matched_location[0]
                birth_country = b_matched_location[-1]

                if not country_exists(birth_country):
                    raise NoSuchElementException("birth_country_not_found")
            else:
                raise NoSuchElementException("birth_country_not_found")

        except NoSuchElementException:
            pass
            # Couldn't find player birth place, Setting birth_country with flag_code
            birth_country = find_country_with_flag_code(flag_code)
            if birth_country is None:
                msg = "Couldn't find birth country for player '{0}'".format(
                    atp_id)
                log_to_file(msg, PLAYER_LOGS)
                log("players", msg)

        player["birth_city"] = birth_city
        player["birth_country"] = birth_country

        residence_city = residence_country = None
        try:
            residence = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div["
                "1]/table/tbody/tr[2]/td[2]/div/div[2]").text

            r_matched_location = residence.split(", ")
            if len(r_matched_location) > 1:
                residence_city = r_matched_location[0]
                residence_country = r_matched_location[-1]
        except NoSuchElementException:
            pass

        player["residence_city"] = residence_city
        player["residence_country"] = residence_country

        handedness = backhand = None
        try:
            hands = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div["
                "1]/table/tbody/tr[2]/td[3]/div/div[2]").text
            hands_matched = hands.split(", ")
            if len(hands_matched) > 1:
                handedness = hands_matched[0]
                backhand = hands_matched[-1]
        except NoSuchElementException:
            pass

        player["handedness"] = handedness
        player["backhand"] = backhand

    except Exception as ex:
        player = None
        msg = "Couldn't scrap player : atp_id= '{}'".format(atp_id)
        log_to_file(msg, PLAYER_LOGS)
        log("players", msg)
        print(type(ex))

    driver.quit()

    return player
Example #23
0
def scrap_match_flashscore(match_id, status):
    match = pd.Series([match_id], index=["match_id"])
    driver = get_chrome_driver()

    try:
        match["match_id"] = match_id
        match_url = "https://www.flashscore.com/match/" + match_id
        driver.get(match_url)
        time.sleep(1)

        tournament_elem = driver.find_element_by_xpath(
            "//div[contains(@class, 'tournamentHeaderDescription')]/div[1]/span[3]/a"
        )

        tournament_regex = re.search("atp-singles/(.*)/", tournament_elem.get_attribute("href"))
        match["tournament_id"] = tournament_regex.group(1)
        add_tournament_info(match)

        round_regex = re.search(",.*- (.*)$", tournament_elem.text)
        if round_regex:
            match["round"] = round_regex.group(1)
        else:
            match["round"] = "Group"

        match["p1_id"], match["p1_url"], match["p2_id"], match["p2_url"] = scrap_player_ids(driver)
        add_player_info(match)
        match.drop(columns=["p1_url", "p2_url"], inplace=True)

        match_date = None
        try:
            match_date_elem = driver.find_element_by_xpath("//div[@id='detail']/div[4]/div[1]").text
            match_date_regex = re.search(r"^([0-9]+)\.([0-9]+)\.([0-9]+) ([0-9]+):([0-9]+)$", match_date_elem)
            day = int(match_date_regex.group(1))
            month = int(match_date_regex.group(2))
            year = int(match_date_regex.group(3))
            hour = int(match_date_regex.group(4))
            minute = int(match_date_regex.group(5))

            match_date = pd.to_datetime("{0} {1} {2} {3} {4}".format(year, month, day, hour, minute)
                                        , format='%Y %m %d %H %M', utc=True)

        except Exception as ex:
            msg = "Error with date format - scraping match '{}'".format(match_id)
            log_to_file(msg, MATCHES_ERROR_LOGS)
            log("scrap_match", msg, type(ex).__name__)
            raise Exception

        match["datetime"] = match_date

        '''
        Section usefull for scrap_tournament_matches()
        
        if status is None:
            status_elem = driver.find_element_by_xpath("//div[@id='detail']/div[4]/div[3]/div[1]/div[2]/span[1]").text
            if status_elem == "Finished":
                status = MatchStatus.Finished
            else:
                retired_regex = re.search("retired", status_elem)
                if retired_regex:
                    status = MatchStatus.Retired
                else:
                    msg = "status_error - match '{}'".format(match_id)
                    log_to_file(msg, MATCHES_ERROR_LOGS)
                    log("scrap_match", msg)
                    driver.quit()
                    return None
        '''

        match["status"] = status.name

        if status in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Live, MatchStatus.Awarded,
                      MatchStatus.Interrupted]:

            if status != MatchStatus.Live:
                # Set match winner only if match has already finished
                participant_elems = driver.find_elements_by_xpath("//a[starts-with(@class, 'participantName___')]")

                if len(participant_elems[-1].find_elements_by_xpath("strong")) == 1:
                    match["p1_wins"] = False
                else:
                    match["p1_wins"] = True

            duration_elem = driver.find_element_by_xpath("//div[contains(@class, 'time--overall')]").text
            duration_regex = re.search("([0-9]+):([0-9]+)", duration_elem)
            match["minutes"] = int(duration_regex.group(1)) * 60 + int(duration_regex.group(2))

            match["p1_s1_gms"], match["p1_tb1_score"] = find_gms_value(1, 1, driver)
            match["p1_s2_gms"], match["p1_tb2_score"] = find_gms_value(1, 2, driver)
            match["p1_s3_gms"], match["p1_tb3_score"] = find_gms_value(1, 3, driver)
            match["p1_s4_gms"], match["p1_tb4_score"] = find_gms_value(1, 4, driver)
            match["p1_s5_gms"], match["p1_tb5_score"] = find_gms_value(1, 5, driver)

            match["p2_s1_gms"], match["p2_tb1_score"] = find_gms_value(2, 1, driver)
            match["p2_s2_gms"], match["p2_tb2_score"] = find_gms_value(2, 2, driver)
            match["p2_s3_gms"], match["p2_tb3_score"] = find_gms_value(2, 3, driver)
            match["p2_s4_gms"], match["p2_tb4_score"] = find_gms_value(2, 4, driver)
            match["p2_s5_gms"], match["p2_tb5_score"] = find_gms_value(2, 5, driver)

            driver.find_element_by_link_text("Statistics").click()
            time.sleep(0.5)

            row_elements = driver.find_elements_by_xpath("//div[starts-with(@class, 'statRow___')]") # stat_elem.find_elements_by_class_name("statRow")

            stat_labels = []
            p1_stats = []
            p2_stats = []
            for row_elem in row_elements:
                stat_labels.append(row_elem.find_element_by_xpath("div[1]/div[2]").text)
                p1_stats.append(row_elem.find_element_by_xpath("div[1]/div[1]").text)
                p2_stats.append(row_elem.find_element_by_xpath("div[1]/div[3]").text)

            stats_dataframe = pd.DataFrame({"label": stat_labels, "p1": p1_stats, "p2": p2_stats})

            match["p1_ace"] = int(stats_dataframe[stats_dataframe["label"] == "Aces"].iloc[0]["p1"])
            match["p1_df"] = int(stats_dataframe[stats_dataframe["label"] == "Double Faults"].iloc[0]["p1"])

            p1_svpt_elem = stats_dataframe[stats_dataframe["label"] == "Service Points Won"].iloc[0]["p1"]
            p1_svpt_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_svpt_elem)
            match["p1_svpt"] = int(p1_svpt_regex.group(3))
            match["p1_svpt_won"] = int(p1_svpt_regex.group(2))
            match["p1_svpt_ratio"] = int(p1_svpt_regex.group(1)) / 100

            p1_1st_elem = stats_dataframe[stats_dataframe["label"] == "1st Serve Points Won"].iloc[0]["p1"]
            p1_1st_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_1st_elem)
            match["p1_1st_in"] = int(p1_1st_regex.group(3))
            match["p1_1st_won"] = int(p1_1st_regex.group(2))
            match["p1_1st_won_ratio"] = int(p1_1st_regex.group(1)) / 100

            p1_2nd_elem = stats_dataframe[stats_dataframe["label"] == "2nd Serve Points Won"].iloc[0]["p1"]
            p1_2nd_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_2nd_elem)
            match["p1_2nd_pts"] = int(p1_2nd_regex.group(3))
            match["p1_2nd_won"] = int(p1_2nd_regex.group(2))
            match["p1_2nd_won_ratio"] = int(p1_2nd_regex.group(1)) / 100

            p1_bp_elem = stats_dataframe[stats_dataframe["label"] == "Break Points Saved"].iloc[0]["p1"]
            p1_bp_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_bp_elem)
            match["p1_bp_faced"] = int(p1_bp_regex.group(3))
            match["p1_bp_saved"] = int(p1_bp_regex.group(2))
            match["p1_bp_saved_ratio"] = int(p1_bp_regex.group(1)) / 100

            match["p2_ace"] = int(stats_dataframe[stats_dataframe["label"] == "Aces"].iloc[0]["p2"])
            match["p2_df"] = int(stats_dataframe[stats_dataframe["label"] == "Double Faults"].iloc[0]["p2"])

            p2_svpt_elem = stats_dataframe[stats_dataframe["label"] == "Service Points Won"].iloc[0]["p2"]
            p2_svpt_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_svpt_elem)
            match["p2_svpt"] = int(p2_svpt_regex.group(3))
            match["p2_svpt_won"] = int(p2_svpt_regex.group(2))
            match["p2_svpt_ratio"] = int(p2_svpt_regex.group(1)) / 100

            p2_1st_elem = stats_dataframe[stats_dataframe["label"] == "1st Serve Points Won"].iloc[0]["p2"]
            p2_1st_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_1st_elem)
            match["p2_1st_in"] = int(p2_1st_regex.group(3))
            match["p2_1st_won"] = int(p2_1st_regex.group(2))
            match["p2_1st_won_ratio"] = int(p2_1st_regex.group(1)) / 100

            p2_2nd_elem = stats_dataframe[stats_dataframe["label"] == "2nd Serve Points Won"].iloc[0]["p2"]
            p2_2nd_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_2nd_elem)
            match["p2_2nd_pts"] = int(p2_2nd_regex.group(3))
            match["p2_2nd_won"] = int(p2_2nd_regex.group(2))
            match["p2_2nd_won_ratio"] = int(p2_2nd_regex.group(1)) / 100

            p2_bp_elem = stats_dataframe[stats_dataframe["label"] == "Break Points Saved"].iloc[0]["p2"]
            p2_bp_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_bp_elem)
            match["p2_bp_faced"] = int(p2_bp_regex.group(3))
            match["p2_bp_saved"] = int(p2_bp_regex.group(2))
            match["p2_bp_saved_ratio"] = int(p2_bp_regex.group(1)) / 100

            p1_sv_gms_elem = stats_dataframe[stats_dataframe["label"] == "Service Games Won"].iloc[0]["p1"]
            p1_sv_gms_rgx = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_sv_gms_elem)
            match["p1_sv_gms"] = int(p1_sv_gms_rgx.group(3))
            match["p1_sv_gms_won"] = int(p1_sv_gms_rgx.group(2))
            match["p1_sv_gms_won_ratio"] = int(p1_sv_gms_rgx.group(1)) / 100

            p2_sv_gms_elem = stats_dataframe[stats_dataframe["label"] == "Service Games Won"].iloc[0]["p2"]
            p2_sv_gms_rgx = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_sv_gms_elem)
            match["p2_sv_gms"] = int(p2_sv_gms_rgx.group(3))
            match["p2_sv_gms_won"] = int(p2_sv_gms_rgx.group(2))
            match["p2_sv_gms_won_ratio"] = int(p2_sv_gms_rgx.group(1)) / 100

            match["p1_1st_serve_ratio"] = match["p1_1st_in"] / match["p1_svpt"] if match["p1_svpt"] > 0 else None
            match["p2_1st_serve_ratio"] = match["p2_1st_in"] / match["p2_svpt"] if match["p2_svpt"] > 0 else None

    except Exception as ex:
        msg = "Error while scraping match id '{}'".format(match_id)
        log_to_file(msg, MATCHES_ERROR_LOGS)
        log("scrap_match", msg, type(ex).__name__)
        match = None

    driver.quit()
    return match
def search_tournament_atptour(tournament, date_of_matches):
    flash_id = tournament["flash_id"]

    tournaments_atptour = search_all_tournaments_atptour()

    # Tournament already exists - Checking if it has kept same references on atptour
    if "atp_id" in tournament.index and "atp_formatted_name" in tournament.index:
        atp_id = tournament["atp_id"]
        atp_formatted_name = tournament["atp_formatted_name"]
        tour_matched = tournaments_atptour[
            (tournaments_atptour["atp_id"] == atp_id) &
            (tournaments_atptour["atp_formatted_name"] == atp_formatted_name)]

        # Tournament has kept same references
        if len(tour_matched.index) == 1:
            return tournament

        # Tournament has new references (changed atp_id)
        tour_matched = tournaments_atptour[
            tournaments_atptour["atp_formatted_name"] == atp_formatted_name]
        if len(tour_matched.index) == 1:
            # New tournament kept same formatted_name but new atp_id
            new_atp_id = tour_matched.iloc[0]["atp_id"]
            log_to_file(
                "Tournament '{0}' changed atp_id from '{1}' to '{2}'".format(
                    flash_id, atp_id, new_atp_id), TOURNAMENT_LOGS)
            tournament["atp_id"] = new_atp_id
            return tournament

        # Tournament has new references (changed atp_id and atp_formatted_name)
        tournament_name = get_tournament_name(tournament["flash_name"])
        tour_matched = tournaments_atptour[tournaments_atptour["atp_name"] ==
                                           tournament_name]
        if len(tour_matched.index) == 1:
            # New tournament kept same formatted_name but new atp_id
            new_atp_id = tour_matched.iloc[0]["atp_id"]
            new_formatted_name = tour_matched.iloc[0]["atp_formatted_name"]
            log_to_file(
                "Tournament '{0}' changed atp_id from '{1}' to '{2}'".format(
                    flash_id, atp_id, new_atp_id), TOURNAMENT_LOGS)
            log_to_file(
                "Tournament '{0}' changed atp_formatted_name from '{1}' to '{2}'"
                .format(flash_id, atp_formatted_name,
                        new_formatted_name), TOURNAMENT_LOGS)
            tournament["atp_id"] = new_atp_id
            tournament["atp_formatted_name"] = new_formatted_name
            return tournament

        # Tournament new references not found
        else:
            msg = "Tournament '{0}' not found, atp_id: '{1}' and atp_formatted_name: '{2}'"\
                .format(flash_id, atp_id, atp_formatted_name)
            log_to_file(msg, TOURNAMENT_LOGS)
            log("tournament_not_found", msg)
            return None

    # New tournament
    else:
        tournament_name = get_tournament_name(tournament["flash_name"])
        country = tournament["country"]

        tour_matched = tournaments_atptour[tournaments_atptour["atp_name"] ==
                                           tournament_name]

        if len(tour_matched.index) != 1:
            # Tournament not found by name. Try to find tournament by start date, end date and country
            tour_matched = tournaments_atptour[
                (tournaments_atptour["start_date"] <= pd.Timestamp(
                    date_of_matches))
                & (tournaments_atptour["end_date"] >= pd.Timestamp(
                    date_of_matches))
                & (tournaments_atptour["country"] == country)]

        # New tournament references found
        if len(tour_matched.index) == 1:
            tournament["atp_id"] = tour_matched.iloc[0]["atp_id"]
            tournament["atp_formatted_name"] = tour_matched.iloc[0][
                "atp_formatted_name"]
            return tournament

        # New tournament references not found
        else:
            msg = "Tournament '{0}' not found".format(flash_id)
            log_to_file(msg, TOURNAMENT_LOGS)
            log("tournament_not_found", msg)
            return None
Example #25
0
import socket
import pickle
import _thread
from broker import Broker
from log import log_to_file
"""
Listener demon: listens to message continuouly 
"""

s = socket.socket()
print("Socket successfully created")
port = 12345
s.bind(('', port))
s.listen(5)

broker = Broker()

log_to_file("broker started")

while True:

    # Establish connection with client.
    c, addr = s.accept()
    log_to_file('Got connection from' + str(addr))
    order = c.recv(1024)
    message = pickle.loads(order)
    _thread.start_new_thread(broker.process_message, (message, addr))

# Close the connection with the client
c.close()
Example #26
0
def main():
    # mapping of commands to handlers
    valid_commands = [
        ('create',      h_create),
        ('compile',     h_compile),
        ('train',       h_train),
        ('test',        h_test),
        ('export',      h_export),
        ('interactive', h_interactive),
    ]

    # create parser
    parser = argparse.ArgumentParser(description="RNN Encoder Decoder",
            fromfile_prefix_chars='@')
    
    # global
    helpstr = "List of commands: " + ', '.join([name for name, handler in valid_commands])
    parser.add_argument('commands', type=str, nargs='+',
            help=helpstr)

    # data
    parser.add_argument('--train-src', dest='train_src', type=str,
            help="Training sentences for source (encoder) network")
    parser.add_argument('--train-dst', dest='train_dst', type=str,
            help="Training sentences for destination (decoder) network")
    parser.add_argument('--train-both', dest='train_both', type=str,
            help="Training sentences for both encoder and decoder network")
    parser.add_argument('--validation-src', dest='validation_src', type=str,
            help="Validation sentences for source (encoder) network")
    parser.add_argument('--validation-dst', dest='validation_dst', type=str,
            help="Validation sentences for destination (decoder) network")
    parser.add_argument('--validation-both', dest='validation_both', type=str,
            help="Test sentences for both encoder and decoder network")
    parser.add_argument('--test-src', dest='test_src', type=str,
            help="Test sentences for source (encoder) network")
    parser.add_argument('--test-dst', dest='test_dst', type=str,
            help="Test sentences for destination (decoder) network")
    parser.add_argument('--test-both', dest='test_both', type=str,
            help="Test sentences for both encoder and decoder network")

    # compile parameters
    parser.add_argument('--embedding-size', dest='embedding_size', type=int,
            help="Embedding vector size")
    parser.add_argument('--layers', dest='layer_count', type=int,
            help="Network layer count")
    parser.add_argument('--max-sentence-length', dest='maxlen', type=int,
            help="Maximum sentence length")
    parser.add_argument('--optimizer', dest='optimizer', type=str,
            default='adagrad',
            help="Optimizer type (rmsprop, sgd, adadelta, adagrad)")
    parser.add_argument('--compile-train', dest='compile_train', type=str,
            default='True',
            help="Compile training functions for model")

    # train parameters
    parser.add_argument('--batch-size', dest='batch_size', type=int,
            default=16,
            help="Training batch size")
    parser.add_argument('--validation-skip', dest='validation_skip', type=float,
            default=10,
            help="Amount of epochs to skip before outputting validation translations")
    parser.add_argument('--snapshot-skip', dest='snapshot_skip', type=float,
            default=10,
            help="Amount of epochs to skip between snapshots")
    parser.add_argument('--lr-encoder', dest='lr_encoder', type=float,
            default=None,
            help="Learning rate for encoder")
    parser.add_argument('--lr-decoder', dest='lr_decoder', type=float,
            default=None,
            help="Learning rate for decoder")
    parser.add_argument('--lr-both', dest='lr_both', type=float,
            default=None,
            help="Learning rate for both")
    parser.add_argument('--epoch-start', dest='epoch_start', type=int,
            default=0,
            help="Starting epoch")
    parser.add_argument('--sample-size', dest='sample_size', type=int,
            default=200,
            help="Sample size for validation loss/test+validation statistics (if <= 0: use the entire sets)")
    parser.add_argument('--show-multiple', dest='show_multiple', type=str,
            default='false',
            help="Show top-N for each translation")

    # trianing thresholds
    parser.add_argument('--epochs', dest='epochs', type=int,
            default=None,
            help="Cutoff for training (number of epochs)")
    parser.add_argument('--error', dest='error', type=float,
            default=None,
            help="Cutoff for training (test and validation error)")
    parser.add_argument('--seconds', dest='seconds', type=float,
            default=None,
            help="Cutoff for training (total seconds elapsed)")
    parser.add_argument('--loss', dest='loss', type=float,
            default=None,
            help="Cutoff for training (test and validation loss)")

    # test parameters
    parser.add_argument('--format', dest='test_format', type=str,
            default='',
            help="Test output format (options: pairs (default), simple, complex)")

    # logging
    parser.add_argument('--log-global', dest='log_glob', type=str,
            help="Log file for all output")
    parser.add_argument('--log-info', dest='log_info', type=str,
            help="Log file for updates (no data dumps)")
    parser.add_argument('--log-stat', dest='log_stat', type=str,
            help="Log file for stats (validation accuracy, etc)")

    # models
    parser.add_argument('--embedding-src', dest='embedding_src', type=str,
            help="Input filename for src embedding")
    parser.add_argument('--embedding-dst', dest='embedding_dst', type=str,
            help="Input filename for dst embedding")
    parser.add_argument('--embedding-both', dest='embedding_both', type=str,
            help="Input filename for both embedding")
    parser.add_argument('--output-embedding-src', dest='output_embedding_src', type=str,
            help="Output filename for src embedding")
    parser.add_argument('--output-embedding-dst', dest='output_embedding_dst', type=str,
            help="Output filename for dst embedding")
    parser.add_argument('--output-embedding-both', dest='output_embedding_both', type=str,
            help="Output filename for both embedding")
    parser.add_argument('--compiled-model', dest='compiled_model', type=str,
            help="Input filename for compiled model")
    parser.add_argument('--output-compiled-model', dest='output_compiled_model', type=str,
            help="Output filename for compiled model")
    parser.add_argument('--fitted-model', dest='fitted_model', type=str,
            help="Input filename for fitted model")
    parser.add_argument('--output-fitted-model', dest='output_fitted_model', type=str,
            help="Output filename for fitted model")
    parser.add_argument('--model-weights', dest='model_weights', type=str,
            help="Input filename for model weights")
    parser.add_argument('--output-model-weights', dest='output_model_weights', type=str,
            help="Output filename for model weights")
    parser.add_argument('--output-snapshot-prefix', dest='output_snapshot_prefix', type=str,
            help="Output prefix for snapshots")

    
    args = parser.parse_args()

    # handle 'both' arguments here
    if args.train_both is not None:
        args.train_src = args.train_both
        args.train_dst = args.train_both
    if args.validation_both is not None:
        args.validation_src = args.validation_both
        args.validation_dst = args.validation_both
    if args.test_both is not None:
        args.test_src = args.test_both
        args.test_dst = args.test_both
    if args.embedding_both is not None:
        args.embedding_src = args.embedding_both
        args.embedding_dst = args.embedding_both
    if args.output_embedding_both is not None:
        args.output_embedding_src = args.output_embedding_both
        args.output_embedding_dst = args.output_embedding_both
    if args.lr_both is not None:
        args.lr_encoder = args.lr_both
        args.lr_decoder = args.lr_both

    # handle logs
    if args.log_glob is not None:
        log_to_file('glob', args.log_glob)
    if args.log_info is not None:
        log_to_file('info', args.log_info)
    if args.log_stat is not None:
        log_to_file('stat', args.log_stat)

    log("Loaded arguments")
    print args
    
    commands = map(str.lower, args.commands)

    cache = collections.defaultdict(lambda: None, {'commands': commands})
    
    # check that all commands are valid before executing
    for command in commands:
        if command not in map(lambda (name, handler): name, valid_commands):
            log("Parsed invalid command {0}: exiting".format(command))
            exit()
Example #27
0
 def subscribe_phase3(self, subscriber_mail_id, events):
     broker_ip = BrokerManager().get_random_broker()
     msg = message_utility.get_subscriber_msg(events, subscriber_mail_id)
     log.log_to_file(broker_ip)
     Messenger().send_message(msg, broker_ip)
Example #28
0
def main():
    # mapping of commands to handlers
    valid_commands = [
        ('create', h_create),
        ('compile', h_compile),
        ('train', h_train),
        ('test', h_test),
        ('export', h_export),
        ('interactive', h_interactive),
    ]

    # create parser
    parser = argparse.ArgumentParser(description="RNN Encoder Decoder",
                                     fromfile_prefix_chars='@')

    # global
    helpstr = "List of commands: " + ', '.join(
        [name for name, handler in valid_commands])
    parser.add_argument('commands', type=str, nargs='+', help=helpstr)

    # data
    parser.add_argument('--train-src',
                        dest='train_src',
                        type=str,
                        help="Training sentences for source (encoder) network")
    parser.add_argument(
        '--train-dst',
        dest='train_dst',
        type=str,
        help="Training sentences for destination (decoder) network")
    parser.add_argument(
        '--train-both',
        dest='train_both',
        type=str,
        help="Training sentences for both encoder and decoder network")
    parser.add_argument(
        '--validation-src',
        dest='validation_src',
        type=str,
        help="Validation sentences for source (encoder) network")
    parser.add_argument(
        '--validation-dst',
        dest='validation_dst',
        type=str,
        help="Validation sentences for destination (decoder) network")
    parser.add_argument(
        '--validation-both',
        dest='validation_both',
        type=str,
        help="Test sentences for both encoder and decoder network")
    parser.add_argument('--test-src',
                        dest='test_src',
                        type=str,
                        help="Test sentences for source (encoder) network")
    parser.add_argument(
        '--test-dst',
        dest='test_dst',
        type=str,
        help="Test sentences for destination (decoder) network")
    parser.add_argument(
        '--test-both',
        dest='test_both',
        type=str,
        help="Test sentences for both encoder and decoder network")

    # compile parameters
    parser.add_argument('--embedding-size',
                        dest='embedding_size',
                        type=int,
                        help="Embedding vector size")
    parser.add_argument('--layers',
                        dest='layer_count',
                        type=int,
                        help="Network layer count")
    parser.add_argument('--max-sentence-length',
                        dest='maxlen',
                        type=int,
                        help="Maximum sentence length")
    parser.add_argument(
        '--optimizer',
        dest='optimizer',
        type=str,
        default='adagrad',
        help="Optimizer type (rmsprop, sgd, adadelta, adagrad)")
    parser.add_argument('--compile-train',
                        dest='compile_train',
                        type=str,
                        default='True',
                        help="Compile training functions for model")

    # train parameters
    parser.add_argument('--batch-size',
                        dest='batch_size',
                        type=int,
                        default=16,
                        help="Training batch size")
    parser.add_argument(
        '--validation-skip',
        dest='validation_skip',
        type=float,
        default=10,
        help=
        "Amount of epochs to skip before outputting validation translations")
    parser.add_argument('--snapshot-skip',
                        dest='snapshot_skip',
                        type=float,
                        default=10,
                        help="Amount of epochs to skip between snapshots")
    parser.add_argument('--lr-encoder',
                        dest='lr_encoder',
                        type=float,
                        default=None,
                        help="Learning rate for encoder")
    parser.add_argument('--lr-decoder',
                        dest='lr_decoder',
                        type=float,
                        default=None,
                        help="Learning rate for decoder")
    parser.add_argument('--lr-both',
                        dest='lr_both',
                        type=float,
                        default=None,
                        help="Learning rate for both")
    parser.add_argument('--epoch-start',
                        dest='epoch_start',
                        type=int,
                        default=0,
                        help="Starting epoch")
    parser.add_argument(
        '--sample-size',
        dest='sample_size',
        type=int,
        default=200,
        help=
        "Sample size for validation loss/test+validation statistics (if <= 0: use the entire sets)"
    )
    parser.add_argument('--show-multiple',
                        dest='show_multiple',
                        type=str,
                        default='false',
                        help="Show top-N for each translation")

    # trianing thresholds
    parser.add_argument('--epochs',
                        dest='epochs',
                        type=int,
                        default=None,
                        help="Cutoff for training (number of epochs)")
    parser.add_argument('--error',
                        dest='error',
                        type=float,
                        default=None,
                        help="Cutoff for training (test and validation error)")
    parser.add_argument('--seconds',
                        dest='seconds',
                        type=float,
                        default=None,
                        help="Cutoff for training (total seconds elapsed)")
    parser.add_argument('--loss',
                        dest='loss',
                        type=float,
                        default=None,
                        help="Cutoff for training (test and validation loss)")

    # test parameters
    parser.add_argument(
        '--format',
        dest='test_format',
        type=str,
        default='',
        help="Test output format (options: pairs (default), simple, complex)")

    # logging
    parser.add_argument('--log-global',
                        dest='log_glob',
                        type=str,
                        help="Log file for all output")
    parser.add_argument('--log-info',
                        dest='log_info',
                        type=str,
                        help="Log file for updates (no data dumps)")
    parser.add_argument('--log-stat',
                        dest='log_stat',
                        type=str,
                        help="Log file for stats (validation accuracy, etc)")

    # models
    parser.add_argument('--embedding-src',
                        dest='embedding_src',
                        type=str,
                        help="Input filename for src embedding")
    parser.add_argument('--embedding-dst',
                        dest='embedding_dst',
                        type=str,
                        help="Input filename for dst embedding")
    parser.add_argument('--embedding-both',
                        dest='embedding_both',
                        type=str,
                        help="Input filename for both embedding")
    parser.add_argument('--output-embedding-src',
                        dest='output_embedding_src',
                        type=str,
                        help="Output filename for src embedding")
    parser.add_argument('--output-embedding-dst',
                        dest='output_embedding_dst',
                        type=str,
                        help="Output filename for dst embedding")
    parser.add_argument('--output-embedding-both',
                        dest='output_embedding_both',
                        type=str,
                        help="Output filename for both embedding")
    parser.add_argument('--compiled-model',
                        dest='compiled_model',
                        type=str,
                        help="Input filename for compiled model")
    parser.add_argument('--output-compiled-model',
                        dest='output_compiled_model',
                        type=str,
                        help="Output filename for compiled model")
    parser.add_argument('--fitted-model',
                        dest='fitted_model',
                        type=str,
                        help="Input filename for fitted model")
    parser.add_argument('--output-fitted-model',
                        dest='output_fitted_model',
                        type=str,
                        help="Output filename for fitted model")
    parser.add_argument('--model-weights',
                        dest='model_weights',
                        type=str,
                        help="Input filename for model weights")
    parser.add_argument('--output-model-weights',
                        dest='output_model_weights',
                        type=str,
                        help="Output filename for model weights")
    parser.add_argument('--output-snapshot-prefix',
                        dest='output_snapshot_prefix',
                        type=str,
                        help="Output prefix for snapshots")

    args = parser.parse_args()

    # handle 'both' arguments here
    if args.train_both is not None:
        args.train_src = args.train_both
        args.train_dst = args.train_both
    if args.validation_both is not None:
        args.validation_src = args.validation_both
        args.validation_dst = args.validation_both
    if args.test_both is not None:
        args.test_src = args.test_both
        args.test_dst = args.test_both
    if args.embedding_both is not None:
        args.embedding_src = args.embedding_both
        args.embedding_dst = args.embedding_both
    if args.output_embedding_both is not None:
        args.output_embedding_src = args.output_embedding_both
        args.output_embedding_dst = args.output_embedding_both
    if args.lr_both is not None:
        args.lr_encoder = args.lr_both
        args.lr_decoder = args.lr_both

    # handle logs
    if args.log_glob is not None:
        log_to_file('glob', args.log_glob)
    if args.log_info is not None:
        log_to_file('info', args.log_info)
    if args.log_stat is not None:
        log_to_file('stat', args.log_stat)

    log("Loaded arguments")
    print args

    commands = map(str.lower, args.commands)

    cache = collections.defaultdict(lambda: None, {'commands': commands})

    # check that all commands are valid before executing
    for command in commands:
        if command not in map(lambda (name, handler): name, valid_commands):
            log("Parsed invalid command {0}: exiting".format(command))
            exit()