コード例 #1
0
def tournament_sezon_insert_db(country_tuple):
    global url_base, driver

    url = url_base + country_tuple[1]
    driver.get(url)

    data1 = driver.find_elements_by_css_selector(
        ".h-text-left , .js-tablebanner-t a")
    sezon_list = []
    tour_list = []
    tour_sezon_list = []
    sezon = ''
    for i in data1:
        if i.get_attribute('href'):
            tour_link = re.search(r'/soccer\S+',
                                  str(i.get_attribute('href'))).group()[7:]
            query = "SELECT id_sezon_has_tournament FROM soc2.sezon_has_tournament where sezon_has_tournament_url_exp=\'%s\';" % tour_link
            if not db_soc2.query_with_fetchone(query):
                tour = i.text
                query = """ SELECT id_tournament FROM soc2.tournament where tournament_name =\'%s\' and
                    region_id_region=(SELECT id_region FROM soc2.region where region_name=\'%s\'); """ % (
                    tour, country_tuple[0])
                if not db_soc2.query_with_fetchone(query) and not (
                    (tour, country_tuple[0]) in tour_list):
                    tour_list.append((tour, country_tuple[0]))
                tour_sezon_tuple = (sezon, tour, country_tuple[0], tour_link)
                tour_sezon_list.append(tour_sezon_tuple)
        else:
            sezon = i.text
            query = "SELECT id_sezon FROM soc2.sezon where sezon_name=\'%s\';" % sezon
            if not db_soc2.query_with_fetchone(query):
                sezon_list.append((sezon, ))
    # query_toyr_seson(sezon_list, tour_list, tour_sezon_list)
    write_csv(tour_sezon_list, 'sezon_tour_list.csv')
コード例 #2
0
def tournament_sezon_insert_db(file_name='sezon_tour_list.csv'):
    """ функция добавления турниров по сезонам из файла"""

    flag_region = ""
    query3 = """INSERT INTO soc2.sezon_has_tournament
        (sezon_id_sezon,tournament_id_tournament,sezon_has_tournament_url_exp)
        value ((SELECT id_sezon FROM sezon WHERE sezon_name=%s),
        (SELECT id_tournament FROM tournament
        WHERE tournament_name=%s and region_id_region =
        (SELECT id_region FROM region WHERE region_name=%s)),
        %s);"""

    with open(file_name, 'r') as f:
        for r in f:
            r = db_soc2.not_unicod(r)
            tour = r.rstrip().split(';')
            if flag_region != tour[2]:
                flag_region = tour[2]
                print(flag_region)

            query = """ SELECT id_sezon_has_tournament FROM soc2.sezon_has_tournament where sezon_has_tournament_url_exp=\"%s\"; """ % tour[
                3]
            if not db_soc2.query_with_fetchone(query):
                db_soc2.insert_one_row_db(query3, tuple(tour))
                print(tour)
コード例 #3
0
def sezon_insert_db(file_name='sezon_tour_list.csv'):
    """ функция добавления сезона из файла"""

    query1 = """INSERT INTO soc2.sezon (sezon_name) VALUES (%s)"""

    with open(file_name, 'r') as f:
        for r in f:
            r = db_soc2.not_unicod(r)
            sezon = str(r.rstrip().split(';')[0])
            query = "SELECT id_sezon FROM soc2.sezon where sezon_name=\'%s\';" % sezon
            if not db_soc2.query_with_fetchone(query):
                db_soc2.insert_one_row_db(query1, (sezon,))
                print(sezon)
コード例 #4
0
def tour_insert_db(file_name='sezon_tour_list.csv'):
    """ функция добавления турниров из файла """

    flag_region = ""
    query2 = """INSERT INTO soc2.tournament(tournament_name, region_id_region)
        VALUES(%s,(SELECT id_region FROM soc2.region WHERE region_name=%s));"""

    with open(file_name, 'r') as f:
        for r in f:
            r = db_soc2.not_unicod(r)
            tour = r.rstrip().split(';')
            if flag_region != tour[2]:
                flag_region = tour[2]
                print(flag_region)
            query = """ SELECT id_tournament FROM soc2.tournament where tournament_name =\'%s\' and
                region_id_region=(SELECT id_region FROM soc2.region where region_name=\'%s\'); """ % (tour[1], tour[2])
            # добавление турниров
            if not db_soc2.query_with_fetchone(query):
                db_soc2.insert_one_row_db(query2, (tour[1], tour[2]))
                print((tour[1], tour[2]))
コード例 #5
0
def _match_db(file_name, country):
    # запрос на выборку матчей по ссылке
    query_1_5 = "SELECT id_match FROM soc2.match WHERE match_url_exp="
    # запрос на добавление матча
    query_2_5 = """INSERT INTO soc2.match
        (match_home, match_away, match_tour, match_round,match_datatime,
        match_url_exp, match_k_av_1, match_k_av_x, match_k_av_2,
        match_k_18_1, match_k_18_x, match_k_18_2, match_k_44_1,
        match_k_44_x, match_k_44_2, m_k_av_o_05, m_k_av_u_05, m_k_av_o_15,
        m_k_av_u_15, m_k_av_o_25, m_k_av_u_25, m_k_av_o_35, m_k_av_u_35,
        m_k_18_o_05, m_k_18_u_05, m_k_18_o_15, m_k_18_u_15, m_k_18_o_25,
        m_k_18_u_25, m_k_18_o_35, m_k_18_u_35, m_k_44_o_05, m_k_44_u_05,
        m_k_44_o_15, m_k_44_u_15, m_k_44_o_25, m_k_44_u_25, m_k_44_o_35, m_k_44_u_35,
        match_status)
        value ((SELECT id_team FROM team WHERE team_name_exp=%s),
        (SELECT id_team FROM team WHERE team_name_exp=%s),
        (SELECT id_sezon_has_tournament FROM sezon_has_tournament
        WHERE sezon_has_tournament_url_exp LIKE %s)
        ,(SELECT id_round FROM round WHERE round_name=%s),%s,%s,
        %s,%s,%s,%s,%s,%s,%s,%s,%s,
        %s,%s,%s,%s,%s,%s,%s,%s,
        %s,%s,%s,%s,%s,%s,%s,%s,
        %s,%s,%s,%s,%s,%s,%s,%s,
        (SELECT id_status FROM status WHERE status_name=%s));"""
    match_list = []
    tuple_match = []
    href_match = []

    print('--------------------------------------------------------')
    print('Обрабатывается файл: matchs/%s/%s' % (country, file_name))

    with open('matchs/%s/%s' % (country, file_name)) as f:
        array = [row.strip().split(';') for row in f]

    # если разделение идёт по знакам табуляции
    if len(array[0]) == 1:
        array0 = [row[0].strip().split('\t') for row in array]
        array = array0
        array0.clear

    # если нет прямой ссылки на матч извлекаем из файла web_match_
    if not array[1][6].startswith('https'):
        href_match = _read_link(file_name, country)

    # если необходимо вносим игроков,команды,раунды и комменты
    dop_match_add(file_name, country)

    for r in array:
        # избавляемся от мусорных строк
        if len(r) < 3:
            continue
        # определяем ссылку на матч
        if len(href_match) > 1:
            link = href_match[r[0] +
                              r[2].split('/')[-3] + '-' + r[4].split('/')[-3]]
        else:
            if len(r[6].split('/')) > 4:
                link = re.search(r'/[^/]*/[^/]*/[^/]*/[^/]*/$', r[6]).group()
            else:
                link = '/%s/%s%s' % (country, file_name[:-4], r[6])

        # проверка по ссылке наличия дубликатов-матчей в базе
        if query_with_fetchone(query_1_5 + "\'" + link + "\'") != None:
            continue
        if any("Awarded" in s for s in r):
            status = 'Awarded'
        elif any("Canceled" in s for s in r):
            status = 'Canceled'
        else:
            status = 'finished'
        # проверка по ссылке наличия дубликатов-матчей в обрабатываемом файле
        flag = False
        for l in match_list:
            if l[5] == link:
                flag = True
                break
        if flag:
            continue

        # 0-1: хозяева(1) и гости (2)
        tuple_match.append(r[1])
        tuple_match.append(r[3])
        # 2 - название файла для определения турнира+сезона
        tuple_match.append('%/'+country+'/' + file_name[:-4]+'/%')
        # 3 раунд
        tuple_match.append(r[0])
        # 4 дата и время
        a = 5
        if len(r[a]) < 10:
            d = '01.01.1901 - 01:00'
        elif len(r[a]) < 17 and r[a+1][:4] != 'https':
            d = r[a] + r[a+1]
        else:
            d = r[a]
        datetime_object = datetime.strptime(d, '%d.%m.%Y - %H:%M')
        tuple_match.append(datetime_object)

        # 5 - ссылка на матч
        tuple_match.append(link)

        # добавляем кефы
        while (not('av1x2' == r[a])):
            a += 1
        for d in ['av1x2', '181x2', '441x2']:
            if len(r) > a and r[a] == d:
                for i in range(3):
                    try:
                        tuple_match.append(float(r[i + a + 1]))
                    except ValueError:
                        tuple_match.append(None)
                    except IndexError:
                        tuple_match.append(None)
                a += 4
            else:
                for i in range(3):
                    tuple_match.append(None)

        for d in ['avou0.5', '18ou0.5', '44ou0.5', 'avou1.5', '18ou1.5',
                  '44ou1.5', 'avou2.5', '18ou2.5', '44ou2.5', 'avou3.5', '18ou3.5', '44ou3.5']:
            if a < len(r) and r[a] == d:
                for i in range(2):
                    try:
                        tuple_match.append(float(r[i + a + 1]))
                    except ValueError:
                        tuple_match.append(None)
                a += 3
            else:
                for i in range(2):
                    tuple_match.append(None)
        tuple_match.append(status)

        try:
            insert_one_row_db(query_2_5, tuple(tuple_match))
        except:
            print('Error: s', tuple_match)
        # match_list.append(tuple(tuple_match))
        if len(tuple_match) != 40:
            print(len(tuple_match))
            print(tuple_match)

        tuple_match.clear()

    # print(match_list)
    # insert_db(query_2_5, match_list)

    # добавляем голы
    gol_add(file_name, country, False)