def tournament_sezon_insert_db(country_tuple): global url_base, driver url = url_base + country_tuple[1] driver.get(url) data1 = driver.find_elements_by_css_selector( ".h-text-left , .js-tablebanner-t a") sezon_list = [] tour_list = [] tour_sezon_list = [] sezon = '' for i in data1: if i.get_attribute('href'): tour_link = re.search(r'/soccer\S+', str(i.get_attribute('href'))).group()[7:] query = "SELECT id_sezon_has_tournament FROM soc2.sezon_has_tournament where sezon_has_tournament_url_exp=\'%s\';" % tour_link if not db_soc2.query_with_fetchone(query): tour = i.text query = """ SELECT id_tournament FROM soc2.tournament where tournament_name =\'%s\' and region_id_region=(SELECT id_region FROM soc2.region where region_name=\'%s\'); """ % ( tour, country_tuple[0]) if not db_soc2.query_with_fetchone(query) and not ( (tour, country_tuple[0]) in tour_list): tour_list.append((tour, country_tuple[0])) tour_sezon_tuple = (sezon, tour, country_tuple[0], tour_link) tour_sezon_list.append(tour_sezon_tuple) else: sezon = i.text query = "SELECT id_sezon FROM soc2.sezon where sezon_name=\'%s\';" % sezon if not db_soc2.query_with_fetchone(query): sezon_list.append((sezon, )) # query_toyr_seson(sezon_list, tour_list, tour_sezon_list) write_csv(tour_sezon_list, 'sezon_tour_list.csv')
def tournament_sezon_insert_db(file_name='sezon_tour_list.csv'): """ функция добавления турниров по сезонам из файла""" flag_region = "" query3 = """INSERT INTO soc2.sezon_has_tournament (sezon_id_sezon,tournament_id_tournament,sezon_has_tournament_url_exp) value ((SELECT id_sezon FROM sezon WHERE sezon_name=%s), (SELECT id_tournament FROM tournament WHERE tournament_name=%s and region_id_region = (SELECT id_region FROM region WHERE region_name=%s)), %s);""" with open(file_name, 'r') as f: for r in f: r = db_soc2.not_unicod(r) tour = r.rstrip().split(';') if flag_region != tour[2]: flag_region = tour[2] print(flag_region) query = """ SELECT id_sezon_has_tournament FROM soc2.sezon_has_tournament where sezon_has_tournament_url_exp=\"%s\"; """ % tour[ 3] if not db_soc2.query_with_fetchone(query): db_soc2.insert_one_row_db(query3, tuple(tour)) print(tour)
def sezon_insert_db(file_name='sezon_tour_list.csv'): """ функция добавления сезона из файла""" query1 = """INSERT INTO soc2.sezon (sezon_name) VALUES (%s)""" with open(file_name, 'r') as f: for r in f: r = db_soc2.not_unicod(r) sezon = str(r.rstrip().split(';')[0]) query = "SELECT id_sezon FROM soc2.sezon where sezon_name=\'%s\';" % sezon if not db_soc2.query_with_fetchone(query): db_soc2.insert_one_row_db(query1, (sezon,)) print(sezon)
def tour_insert_db(file_name='sezon_tour_list.csv'): """ функция добавления турниров из файла """ flag_region = "" query2 = """INSERT INTO soc2.tournament(tournament_name, region_id_region) VALUES(%s,(SELECT id_region FROM soc2.region WHERE region_name=%s));""" with open(file_name, 'r') as f: for r in f: r = db_soc2.not_unicod(r) tour = r.rstrip().split(';') if flag_region != tour[2]: flag_region = tour[2] print(flag_region) query = """ SELECT id_tournament FROM soc2.tournament where tournament_name =\'%s\' and region_id_region=(SELECT id_region FROM soc2.region where region_name=\'%s\'); """ % (tour[1], tour[2]) # добавление турниров if not db_soc2.query_with_fetchone(query): db_soc2.insert_one_row_db(query2, (tour[1], tour[2])) print((tour[1], tour[2]))
def _match_db(file_name, country): # запрос на выборку матчей по ссылке query_1_5 = "SELECT id_match FROM soc2.match WHERE match_url_exp=" # запрос на добавление матча query_2_5 = """INSERT INTO soc2.match (match_home, match_away, match_tour, match_round,match_datatime, match_url_exp, match_k_av_1, match_k_av_x, match_k_av_2, match_k_18_1, match_k_18_x, match_k_18_2, match_k_44_1, match_k_44_x, match_k_44_2, m_k_av_o_05, m_k_av_u_05, m_k_av_o_15, m_k_av_u_15, m_k_av_o_25, m_k_av_u_25, m_k_av_o_35, m_k_av_u_35, m_k_18_o_05, m_k_18_u_05, m_k_18_o_15, m_k_18_u_15, m_k_18_o_25, m_k_18_u_25, m_k_18_o_35, m_k_18_u_35, m_k_44_o_05, m_k_44_u_05, m_k_44_o_15, m_k_44_u_15, m_k_44_o_25, m_k_44_u_25, m_k_44_o_35, m_k_44_u_35, match_status) value ((SELECT id_team FROM team WHERE team_name_exp=%s), (SELECT id_team FROM team WHERE team_name_exp=%s), (SELECT id_sezon_has_tournament FROM sezon_has_tournament WHERE sezon_has_tournament_url_exp LIKE %s) ,(SELECT id_round FROM round WHERE round_name=%s),%s,%s, %s,%s,%s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s,%s, (SELECT id_status FROM status WHERE status_name=%s));""" match_list = [] tuple_match = [] href_match = [] print('--------------------------------------------------------') print('Обрабатывается файл: matchs/%s/%s' % (country, file_name)) with open('matchs/%s/%s' % (country, file_name)) as f: array = [row.strip().split(';') for row in f] # если разделение идёт по знакам табуляции if len(array[0]) == 1: array0 = [row[0].strip().split('\t') for row in array] array = array0 array0.clear # если нет прямой ссылки на матч извлекаем из файла web_match_ if not array[1][6].startswith('https'): href_match = _read_link(file_name, country) # если необходимо вносим игроков,команды,раунды и комменты dop_match_add(file_name, country) for r in array: # избавляемся от мусорных строк if len(r) < 3: continue # определяем ссылку на матч if len(href_match) > 1: link = href_match[r[0] + r[2].split('/')[-3] + '-' + r[4].split('/')[-3]] else: if len(r[6].split('/')) > 4: link = re.search(r'/[^/]*/[^/]*/[^/]*/[^/]*/$', r[6]).group() else: link = '/%s/%s%s' % (country, file_name[:-4], r[6]) # проверка по ссылке наличия дубликатов-матчей в базе if query_with_fetchone(query_1_5 + "\'" + link + "\'") != None: continue if any("Awarded" in s for s in r): status = 'Awarded' elif any("Canceled" in s for s in r): status = 'Canceled' else: status = 'finished' # проверка по ссылке наличия дубликатов-матчей в обрабатываемом файле flag = False for l in match_list: if l[5] == link: flag = True break if flag: continue # 0-1: хозяева(1) и гости (2) tuple_match.append(r[1]) tuple_match.append(r[3]) # 2 - название файла для определения турнира+сезона tuple_match.append('%/'+country+'/' + file_name[:-4]+'/%') # 3 раунд tuple_match.append(r[0]) # 4 дата и время a = 5 if len(r[a]) < 10: d = '01.01.1901 - 01:00' elif len(r[a]) < 17 and r[a+1][:4] != 'https': d = r[a] + r[a+1] else: d = r[a] datetime_object = datetime.strptime(d, '%d.%m.%Y - %H:%M') tuple_match.append(datetime_object) # 5 - ссылка на матч tuple_match.append(link) # добавляем кефы while (not('av1x2' == r[a])): a += 1 for d in ['av1x2', '181x2', '441x2']: if len(r) > a and r[a] == d: for i in range(3): try: tuple_match.append(float(r[i + a + 1])) except ValueError: tuple_match.append(None) except IndexError: tuple_match.append(None) a += 4 else: for i in range(3): tuple_match.append(None) for d in ['avou0.5', '18ou0.5', '44ou0.5', 'avou1.5', '18ou1.5', '44ou1.5', 'avou2.5', '18ou2.5', '44ou2.5', 'avou3.5', '18ou3.5', '44ou3.5']: if a < len(r) and r[a] == d: for i in range(2): try: tuple_match.append(float(r[i + a + 1])) except ValueError: tuple_match.append(None) a += 3 else: for i in range(2): tuple_match.append(None) tuple_match.append(status) try: insert_one_row_db(query_2_5, tuple(tuple_match)) except: print('Error: s', tuple_match) # match_list.append(tuple(tuple_match)) if len(tuple_match) != 40: print(len(tuple_match)) print(tuple_match) tuple_match.clear() # print(match_list) # insert_db(query_2_5, match_list) # добавляем голы gol_add(file_name, country, False)