def store_new_matches(): logger.info('Start scraping and storing new matches') today = date.today() today_matches = get_matches(today) today_json_matches = [json.loads(MatchJsonEncoder().encode(m)) for m in today_matches] bd.insert_new_matches(today_json_matches) logger.info('Finish scraping and storing new matches')
def store_new_matches(): logger.info('Start scraping and storing new matches') today = date.today() today_matches = get_matches(today) today_json_matches = [ json.loads(MatchJsonEncoder().encode(m)) for m in today_matches ] bd.insert_new_matches(today_json_matches) logger.info('Finish scraping and storing new matches')
post_data = {'sportId': '4', 'page': str(page), 'dateFilter': str(day)} while not finished: logger.info('Scraping page %d', page) request = requests.post(url, post_data) soup = BeautifulSoup(request.text, 'html.parser') competitions = soup.find_all(class_='event-group-level1') logger.info('%d competitions found', len(competitions)) for competition in competitions: comp_obj = _get_competition(competition) logger.info('Scraping competition: %s', comp_obj) match_listing = competition.parent.findNext('ul') matches = match_listing.find_all(class_='col3 three-way') logger.info('%d matches found', len(matches)) for match in matches: teams, mults, match_time = _get_match_data(match) match_obj = Match(teams[0], teams[1], mults, comp_obj, datetime.combine(day, match_time)) matches_obj.append(match_obj) if len(competitions) == 0: finished = True page += 1 post_data['page'] = page logger.info('Stop scraping matches on: %s. Matches found: %d', str(day), len(matches_obj)) return matches_obj if __name__ == "__main__": today = date.today() today_matches = get_matches(today) today_json_matches = [json.loads(MatchJsonEncoder().encode(m)) for m in today_matches] bd.insert_new_matches(today_json_matches)
soup = BeautifulSoup(request.text, 'html.parser') competitions = soup.find_all(class_='event-group-level1') logger.info('%d competitions found', len(competitions)) for competition in competitions: comp_obj = _get_competition(competition) logger.info('Scraping competition: %s', comp_obj) match_listing = competition.parent.findNext('ul') matches = match_listing.find_all(class_='col3 three-way') logger.info('%d matches found', len(matches)) for match in matches: teams, mults, match_time = _get_match_data(match) match_obj = Match(teams[0], teams[1], mults, comp_obj, datetime.combine(day, match_time)) matches_obj.append(match_obj) if len(competitions) == 0: finished = True page += 1 post_data['page'] = page logger.info('Stop scraping matches on: %s. Matches found: %d', str(day), len(matches_obj)) return matches_obj if __name__ == "__main__": today = date.today() today_matches = get_matches(today) today_json_matches = [ json.loads(MatchJsonEncoder().encode(m)) for m in today_matches ] bd.insert_new_matches(today_json_matches)