예제 #1
0
def get_bets_from_table(element, home, away):
    bets = []

    trs = element.find('tbody', recursive=False).find_all('tr',
                                                          recursive=False)

    if 'n' in trs[0].find('td', recursive=False).get('class'):
        type_ = remove_colon_and_dash(
            get_tag_text(trs[0].find('td', recursive=False)))
        s = 1
    else:
        type_ = None
        s = 0

    name_tds = trs[s].find_all('td', recursive=False)[1:]
    names = [get_tag_text(name_td) for name_td in name_tds]

    for i in range(s + 1, len(trs)):
        tds = trs[i].find_all('td', recursive=False)
        if len(tds) == 0:
            continue
        subtype = remove_colon_and_dash(get_tag_text(tds[0]))

        table_data = [(names[j - 1], get_tag_text(tds[j]))
                      for j in range(1, len(tds))]
        bets += handle_table_data(table_data,
                                  type_=type_,
                                  subtype=subtype,
                                  home=home,
                                  away=away)

    return bets
예제 #2
0
def get_bets_from_line(element, home, away):
    bets = []

    bet_elements = list(element.descendants)
    type_ = remove_colon_and_dash(get_tag_text(bet_elements[0]))

    bet_blocks = []
    bet_block = ['', '', None]
    i = 2  # 0: <b>, 1: <b>.contents[0], 2: <b>.next_sibling
    while i < len(bet_elements):
        current = bet_elements[i]
        if current.name == 'b':
            bet_blocks.append(bet_block)

            bet_block = ['', '', None]
            bet_block[0] = remove_colon_and_dash(get_tag_text(current))
            (bet_block[2],
             bet_block[0]) = get_and_remove_special_word(bet_block[0])

            end = current.next_sibling
            while i < len(bet_elements) and bet_elements[i] != end:
                i += 1

        else:
            if isinstance(current, bs4.element.NavigableString):
                bet_block[1] += ' ' + str(current)
            i += 1

    bet_blocks.append(bet_block)

    for (prefix, bet_str, bet_special_word) in bet_blocks:
        bet_str = bet_str.strip()
        if len(bet_str) == 0:
            continue

        m1 = re.search(r'^(?:-|:)?\s*(?:\(\s*(.+?)\s*\)\s+)?(.+)$', bet_str)
        if m1 is None:
            continue
        handicap = float_safe(re.sub(
            r'\s*', '', m1.group(1))) if m1.group(1) is not None else None
        unhandicaped = m1.group(2)

        for part in re.split(r'\s*;', unhandicaped):
            part = part.strip()
            if len(part) == 0:
                continue

            m2 = re.search(r'^(?:(.+?)(?:\s*[:-])?\s+)?(\S+)$', part)
            if m2 is None:
                continue
            name = m2.group(1) if m2.group(1) is not None else ''

            value = float_safe(m2.group(2))

            bet = [bet_special_word, type_, prefix, name, handicap, value]
            bets.append(bet)

    return bets
예제 #3
0
def handle_bets(elements, home, away):
    bets = []

    for element in elements:

        try:

            if element.name == 'div':
                if len(element.contents
                       ) >= 3 and element.contents[2].name == 'table':
                    type_ = remove_colon_and_dash(
                        get_tag_text(element.contents[1]))
                    bets += get_bets_from_table(element.contents[2],
                                                home=home,
                                                away=away)
                else:
                    bets += get_bets_from_line(element, home=home, away=away)

            elif element.name == 'table':
                bets += get_bets_from_table(element, home=home, away=away)

            else:
                continue

        except Exception:
            continue

    bets = [bet for bet in bets if bet[5] is not None]

    return bets
예제 #4
0
파일: parsing.py 프로젝트: kuraga/betrobot
def get_bets_from_table(element, home, away):
  bets = []

  thead_trs = element.find('thead', recursive=False).find_all('tr', recursive=False)
  type_ = remove_colon_and_dash( get_tag_text( thead_trs[0] ) )
  name_tds = thead_trs[2].find_all('td', recursive=False)[1:]
  names = [ get_tag_text(name_td) for name_td in name_tds ]

  tbody_trs = element.find_all('tbody', recursive=False)[1].find_all('tr', recursive=False)
  for tr in tbody_trs:
    tds = tr.find_all('td', recursive=False)
    if len(tds) == 0:
      continue
    subtype = remove_colon_and_dash( get_tag_text(tds[0]) )

    table_data = [ (names[j-1], get_tag_text(tds[j])) for j in range(1, len(tds)) ]
    bets += handle_table_data(table_data, type_=type_, subtype=subtype, home=home, away=away)

  return bets
예제 #5
0
파일: parsing.py 프로젝트: kuraga/betrobot
def _extract_player_names(table_tag):
    player_names = []

    trs = table_tag.find('tbody').find_all('tr', recursive=False)
    for tr in trs:
        tds = tr.find_all('td', recursive=False)
        player_name_td = tds[1]
        player_name = get_tag_text(player_name_td)
        player_names.append(player_name)

    return player_names
예제 #6
0
파일: parsing.py 프로젝트: kuraga/betrobot
def handle_date(html_or_file):
    data = []

    soup = bs4.BeautifulSoup(html_or_file, 'lxml')

    tables = soup.find_all('table', class_='meeting-odds')
    for table in tables:
      country_and_tournament_th = table.find('th', class_='tournament-name', recursive=True)
      links = country_and_tournament_th.find_all('a', recursive=True)
      if len(links) > 1:
          intelbet_country = get_tag_text(links[0])
          intelbet_tournament = get_tag_text(links[1])
      else:
          intelbet_country = None
          intelbet_tournament = get_tag_text(links[0])

      trs = table.find('tbody').find_all('tr', recursive=False)
      for tr in trs:
          teams_tags = tr.find('td', class_='name-with-icon', recursive=False).find('a', recursive=False).find('span', recursive=False).find_all('span', recursive=False)
          intelbet_home = get_tag_text(teams_tags[0])
          intelbet_away = get_tag_text(teams_tags[2])

          url_tag = tr.find('td', class_='name-with-icon', recursive=False).find('a', recursive=False)
          url = 'http:%s' % (url_tag['href'],)

          match_time_tag = tr.find('td', class_='tiles-bet-time', recursive=False)
          match_time_str = get_tag_text(match_time_tag)

          item = (intelbet_country, intelbet_tournament, intelbet_home, intelbet_away, url, match_time_str)
          data.append(item)

    return data
예제 #7
0
파일: parsing.py 프로젝트: kuraga/betrobot
def handle_tournament(tournament_table):
  raw_matches_data = []

  tournament_table_tbodies = tournament_table.find_all('tbody', recursive=False)
  tournament_name_tbody = tournament_table_tbodies[0]

  tournament_name = get_tag_text( tournament_name_tbody.find('tr', recursive=False).find('td', recursive=False) )

  tournament_main_tbody = tournament_table_tbodies[1]

  bets = None
  trs = tournament_main_tbody.find_all('tr', recursive=False)
  for tr in trs:
    if 'ng-hide' in tr.get('class', []):
      continue
    if 't_comment' in tr.find('td', recursive=False).get('class', []):
      continue

    if tr.find('td', recursive=False).get('colspan') == '13':
      if bets is not None:
        raw_match_data = {
          'tournament': tournament_name,
          'date': match_date_str,
          'time': time,
          'home': home,
          'away': away,
          'special_word': special_word,
          'bets': bets
        }
        raw_matches_data.append(raw_match_data)

      match_date_str = get_tag_text( tr.find('td', recursive=False) )

    if 'th' in tr.get('class', []):
      main_data_name_tds = tr.find_all('td', recursive=False)
      main_data_names = [ get_tag_text(main_data_name_td) for main_data_name_td in main_data_name_tds ]

    if 'tc' in tr.get('class', []) or 'tc1' in tr.get('class', []):

      if bets is not None:
        raw_match_data = {
          'tournament': tournament_name,
          'date': match_date_str,
          'time': time,
          'home': home,
          'away': away,
          'special_word': special_word,
          'bets': bets
        }
        raw_matches_data.append(raw_match_data)

      main_data_tds = tr.find_all('td', recursive=False)
      main_data = [ (main_data_names[i], get_tag_text(main_data_tds[i])) for i in range(len(main_data_names)) ]
      try:
        (time, home, away, special_word, additional, main_data_bets) = handle_main_data(main_data)
      except Exception:
        return raw_matches_data

      bets = []
      bets += main_data_bets

    elif 'tcd' in tr.get('class', []) or 'tcd1' in tr.get('class', []):

      divs_and_tables = tr.find('td').find('div', class_='extTbl').find('div').contents
      try:
        for element in divs_and_tables:
          if element.name == 'div':
            bets += handle_bets(element.contents, home=home, away=away)
          elif element.name == 'table':
            bets += handle_bets([ element ], home=home, away=away)
          else:
            continue
      except Exception:
        pass

    else:
      continue

  if bets is not None:
    raw_match_data = {
      'tournament': tournament_name,
      'date': match_date_str,
      'time': time,
      'home': home,
      'away': away,
      'special_word': special_word,
      'bets': bets
    }
    raw_matches_data.append(raw_match_data)

  return raw_matches_data
예제 #8
0
def handle_tournament_day(tournament_table):
    raw_matches_data = []

    tournament_day_thead = tournament_table.find('thead', recursive=False)

    tournament_name = get_tag_text(
        tournament_day_thead.find('tr', recursive=False).find('td',
                                                              recursive=False))

    tournament_date_tbody = tournament_table.find('tbody',
                                                  class_='date',
                                                  recursive=False)
    tournament_date = get_tag_text(
        tournament_date_tbody.find('tr',
                                   recursive=False).find('td',
                                                         recursive=False))

    main_data_tds = tournament_table.find(
        'tbody', class_='chead',
        recursive=False).find('tr', class_='th',
                              recursive=False).find_all('td', recursive=False)
    main_data_names = [
        get_tag_text(main_data_td) for main_data_td in main_data_tds
    ]

    match_tbodies = tournament_table.find_all('tbody',
                                              recursive=False,
                                              id='line')
    for match_tbody in match_tbodies:
        match_trs = match_tbody.find_all('tr', recursive=False)
        if len(match_trs) == 0:
            continue

        main_data_tds = match_trs[0].find_all('td', recursive=False)
        main_data = [(main_data_names[i], get_tag_text(main_data_tds[i]))
                     for i in range(len(main_data_names))]
        try:
            (time, home, away, special_word, additional,
             main_data_bets) = handle_main_data(main_data)
        except Exception:
            continue

        bets = []
        bets += main_data_bets
        if len(match_trs) >= 2 and 't_comment' not in match_trs[1].find(
                'td', recursive=False).get('class'):
            elements = match_trs[1].find('td', recursive=False).contents
            try:
                bets += handle_bets(elements, home=home, away=away)
            except Exception:
                pass

        # WARNING: Бывает (как минимум) еще одна строка

        raw_match_data = {
            'tournament': tournament_name,
            'date': tournament_date,
            'time': time,
            'home': home,
            'away': away,
            'special_word': special_word,
            'bets': bets
        }
        raw_matches_data.append(raw_match_data)

    return raw_matches_data