def teams_competing_this_week(): # Create a list of team names teams = [] # Get all weeks and their dates for the season scraper = Scraper() weeks = scraper.get_year_weeks(ScraperConstants.Men, datetime.datetime.now().year) # Get date to scrape this week # Fixes index error once in post season try: date = [week for week in weeks if int(week['current']) == 1][0]['date'] except IndexError: return teams # Gets the schedule for this week schedule = scraper.get_schedule(ScraperConstants.Men, date) # For each day in the schedule with a meet on it for day in schedule: # For each meet on that day for meet in schedule[day]['meets']: # Create a name for the meet depending on home vs. away teams or virtual; if meet['away_teams'] != None: for team in meet['away_teams'].split(", "): teams.append(team) if meet['home_teams'] != None: for team in meet['home_teams'].split(", "): teams.append(team) return teams
def getCoursesCards(soup, isSetCategories=False): selector = 'div.row.course-list.list > div' cards = soup.select(selector) data = [] for card in cards: data.append(extractCourseCard(card)) if isSetCategories: s = Scraper() categoriesUrls = getCategoriesURLs(soup) for category in categoriesUrls: name = category.get('name', '') url = category.get('url') s.get(url) tempSoup = s.html_soup() categoryCards = getCoursesCards(tempSoup) ids = [c['publicId'] for c in categoryCards] for course in data: if course['publicId'] in ids: if course.get('categories', None): course['categories'].append(name) else: course['categories'] = [name] return data
def handle(self, *args, **options): scraper = Scraper() start_time = round(time.time() * 1000) print("Getting list of all teams for %s" % options['year']) # Try and get list of all teams try: teams = scraper.get_teams(ScraperConstants.Men, options['year']) print("Found %s teams" % len(teams)) except: traceback.print_exc() return # For each team new_gymnasts = [] num_skipped = 0 for team in teams: # Get the roster for the team try: print("Getting roster for %s" % team['team_name']) roster = scraper.get_roster(ScraperConstants.Men, options['year'], team['id']) except: traceback.print_exc() return # For each gymnast on the roster, make a new Gymnast object and save it to the database for gymnast in roster: name = "%s %s" % (gymnast['fname'].strip(), gymnast['lname'].strip()) g = Gymnast(name=name, rtn_id=gymnast['id'], team=team['team_name'], year=GYMNAST_YEARS[gymnast['school_year']]) # Check if gymnast already exists in database to avoid readding if Gymnast.objects.filter(rtn_id=gymnast['id']).exists(): num_skipped = num_skipped + 1 else: new_gymnasts.append(g) # Save new gymnasts to database for gymnast in new_gymnasts: gymnast.save() events = ['FX', 'PH', 'SR', 'VT', 'PB', 'HB'] for event in events: Average.objects.create(gymnast=gymnast, score=decimal.Decimal(0.00), event=event, number_of_scores=0) print("") print("------ RESULTS ------") print("Added %s gymnasts" % len(new_gymnasts)) print("Skipped %s existing gymnasts" % num_skipped) print("Took %s ms" % (round(time.time() * 1000) - start_time))
def weekly_news(request): context = {} scraper = Scraper() context['current_week'] = int( scraper.get_current_and_max_week(ScraperConstants.Men, datetime.datetime.now().year)['week']) context['posts'] = Post.objects.filter( status=1, week=context['current_week']).order_by('-posted_at') context['lineup'] = 0 context['platform'] = 1 template_name = 'news/weekly_news.html' return render(request, 'news/weekly_news.html', context)
def remove_gymnast_from_roster(request, team_pk, gymnast_pk): scraper = Scraper() current_week = int(scraper.get_current_and_max_week(ScraperConstants.Men, datetime.datetime.now().year)['week']) gymnast = get_object_or_404(Gymnast, pk=gymnast_pk) team = get_object_or_404(FantasyTeam, pk=team_pk) team.roster.remove(gymnast) league = team.league lineups = LineUp.objects.filter(team=team, week=current_week) for lineup in lineups: lineup.gymnasts.remove(gymnast) league.drafted.remove(gymnast) return redirect('view_team', pk=team_pk)
def test_description_dictionary(): dict_ = { "Type": "Residential", "Style": "2 Storey", "Lot Size": "0.115 Ac", "MLS Number": "PW20120310", "Year Built": "2012", "Parking info": "2, Attached", "Zip": "92886", "School District": "Placentia-Yorba Linda Unified School District", } scraper = Scraper() assert scraper.description_dictionary(soup) == dict_
def get_context_data(self, **kwargs): scraper = Scraper() context = super().get_context_data(**kwargs) if context['object'].team2.user == self.request.user: context['team1'] = context['object'].team2 context['team2'] = context['object'].team1 else: context['team1'] = context['object'].team1 context['team2'] = context['object'].team2 gymnasts = (Gymnast.objects.filter(LineUp__in=(LineUp.objects.filter(team=context['team1'], week=context['object'].week).all() | LineUp.objects.filter(team=context['team2'], week=context['object'].week).all())) | Gymnast.objects.filter(id__in=(context['team1'].roster.all() | context['team2'].roster.all()))).distinct() current_week = int(scraper.get_current_and_max_week(ScraperConstants.Men, datetime.datetime.now().year)['week']) context['current_week'] = current_week context['teams_competing'] = teams_competing_this_week() context['meet_started'] = {} #Could this be optimized? weeks = scraper.get_year_weeks(ScraperConstants.Men, datetime.datetime.now().year) date = [week for week in weeks if int(week['wk']) == int(context['object'].week)][0]['date'] schedule = scraper.get_schedule(ScraperConstants.Men, date) # Loops through every meet day this week for day in schedule: # Loops through every meet on day for meet in schedule[day]['meets']: # Loops through gymnasts for gymnast in gymnasts: #Could this be optimized? # Checks if gymnasts team is in this meet if gymnast.team in str(meet['home_teams']) or gymnast.team in str(meet['away_teams']): #Could this be optimized? # Checks if this is gymnasts first meet of week if gymnast.name not in context['meet_started']: # Meet start datetime meet_datetime = datetime.datetime.strptime(str(meet['d']) + " " + str(meet['time']), "%Y-%m-%d %H:%M:%S") # Current datetime (eastern because thats what RTN uses) now = datetime.datetime.now(timezone('US/Eastern')) if now.date() > meet_datetime.date(): context['meet_started'][gymnast.name] = True elif now.date() == meet_datetime.date(): if meet_datetime.time() != datetime.time(0, 0, 0): if now.time() > meet_datetime.time(): context['meet_started'][gymnast.name] = True else: context['meet_started'][gymnast.name] = False else: if now.time() >= datetime.time(12, 0, 0): context['meet_started'][gymnast.name] = True else: context['meet_started'][gymnast.name] = False else: context['meet_started'][gymnast.name] = False return context
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) context['teams'] = FantasyTeam.objects.filter( league_id=context['object'].id).order_by('-wins', 'name') scraper = Scraper() context['current_week'] = int( scraper.get_current_and_max_week( ScraperConstants.Men, datetime.datetime.now().year)['week']) context['max_week'] = int( scraper.get_current_and_max_week( ScraperConstants.Men, datetime.datetime.now().year)['max']) context['matchups'] = Matchup.objects.filter( team1__in=context['teams']) return context
def extraBlogData(url): s = Scraper() s.get(url) soup = s.html_soup() data = { 'publicId': url.split('/')[4], } thumbnailElm = soup.select_one('.post-content img') data.update({ 'thumbnail': thumbnailElm['src'] if thumbnailElm else None, }) return data
def test_demographics_dictionary(): dict_ = { "Total population": "50,545", "Male population": "24,484", "Female population": "26,061", "Median age": "42.80", "Total households": "16,559", "Average people per household": "3.03", "Total housing units": "17,062", "Owner occupied": "13,469", "Renter occupied": "3,090", "Median year built": "1979", "Median household income": "123,737", "Average household income": "154,190", } scraper = Scraper() assert scraper.demographics_dictionary(soup) == dict_
def getTheGoodZoneDataAndMyPathes(): s = Scraper() coursesUrl = f'{protocol}://{domain}/courses' s.get(coursesUrl) coursesSoup = s.html_soup() blogsUrl = f'{protocol}://{domain}/blog' s.get(blogsUrl) blogsSoup = s.html_soup() return [ { 'delPath': '/delete/courses/', 'setPath': '/set/course/', 'items': getCourses() }, { 'delPath': '/delete/coaches/', 'setPath': '/set/coach/', 'items': getCoaches(coursesSoup)['coach'] }, { 'delPath': '/delete/instructors/', 'setPath': '/set/instructor/', 'items': getCoaches(coursesSoup)['course'] }, { 'delPath': '/delete/live-events/', 'setPath': '/set/live-event/', 'items': getCoaches(coursesSoup)['live event'] }, { 'setPath': '/set-course-description/', 'items': getCoaches(coursesSoup)['description'] }, { 'delPath': '/delete/blogs/', 'setPath': '/set/blog/', 'items': getBlogs(blogsSoup) }, ]
def getCoachingCards(soup): selector = 'div.row.services-row.list > div' cards = soup.select(selector) data = { 'total': [], 'coach': [], 'live event': [], 'course': [], 'description': [] } for card in cards: data['total'].append(extractCoachingCard(card)) s = Scraper() # print(data['total']) for cardData in data['total']: url = cardData.get('url') s.get(url) tempSoup = s.html_soup() descriptionContentELm = tempSoup.select_one( '.course-block.custom_html') # print('Desc: ', descriptionContentELm.text) types = ['description', 'course', 'coach', 'live event'] if not (descriptionContentELm and any([t in descriptionContentELm.text for t in types])): continue descriptionContent = descriptionContentELm.text cardData.update(parseCoaching(descriptionContent)) card = handleCoachingCardData(cardData) data[card.pop('type')].append(card) data.pop('total') return data
def getCourses(): # print('get all courses') allCourses = [] courses = [] first = True s = Scraper() pageNum = 1 while courses or first: first = False allCourses.extend(courses) coursesUrl = f'{protocol}://{domain}/courses?page={pageNum}' # print('\n\n\nurl: ', coursesUrl, '\n\n\n\n') s.get(coursesUrl) coursesSoup = s.html_soup() courses = getCoursesCards(coursesSoup, isSetCategories=True) pageNum += 1 return allCourses
def test_page_type(): scraper = Scraper() assert isinstance(soup, BeautifulSoup)
def test_get_price(): scraper = Scraper() assert scraper.get_price(soup) == 998888
def test_get_bedrooms(): scraper = Scraper() assert scraper.get_bedrooms(soup) == 4
def create_team(user, league): scraper = Scraper() team = FantasyTeam.objects.create(user=user, league=league, name=str(user.username) + "'s Team")
def get_context_data(self, **kwargs): scraper = Scraper() context = super().get_context_data(**kwargs) context["roster"] = context["object"].roster.all() context['current_week'] = int( scraper.get_current_and_max_week( ScraperConstants.Men, datetime.datetime.now().year)['week']) context["lineups"] = LineUp.objects.filter( team=context['object'], week=context['current_week']).order_by('pk') context['teams_competing'] = teams_competing_this_week() context['meet_started'] = {} #Could this be optimized? weeks = scraper.get_year_weeks(ScraperConstants.Men, datetime.datetime.now().year) # Fixes index error once in post season try: date = [ week for week in weeks if int(week['wk']) == context['current_week'] ][0]['date'] except IndexError: return context schedule = scraper.get_schedule(ScraperConstants.Men, date) gymnasts = context["roster"] # Loops through every meet day this week for day in schedule: # Loops through every meet on day for meet in schedule[day]['meets']: # Loops through gymnasts for gymnast in gymnasts: #Could this be optimized? # Checks if gymnasts team is in this meet if gymnast.team in str( meet['home_teams']) or gymnast.team in str( meet['away_teams']): #Could this be optimized? # Checks if this is gymnasts first meet of week if gymnast.name not in context['meet_started']: # Meet start datetime meet_datetime = datetime.datetime.strptime( str(meet['d']) + " " + str(meet['time']), "%Y-%m-%d %H:%M:%S") # Current datetime (eastern because thats what RTN uses) now = datetime.datetime.now(timezone('US/Eastern')) if now.date() > meet_datetime.date(): context['meet_started'][gymnast.name] = True elif now.date() == meet_datetime.date(): if meet_datetime.time() != datetime.time( 0, 0, 0): if now.time() > meet_datetime.time(): context['meet_started'][ gymnast.name] = True else: context['meet_started'][ gymnast.name] = False else: if now.time() >= datetime.time(12, 0, 0): context['meet_started'][ gymnast.name] = True else: context['meet_started'][ gymnast.name] = False else: context['meet_started'][gymnast.name] = False return context
# Load authenticated session from file to prevent unnecessary logins: from scraper.DBConnection import DBConnection from scraper.Scraper import Scraper import argparse parser = argparse.ArgumentParser(description='Fetch questions with solutions from Quizduell Germany') parser.add_argument('username', type=str, help='The username of the quizduell account') parser.add_argument('password', type=str, help='The password of the quizduell account') parser.add_argument('--db', metavar='--db', type=str, help='Path to the sqlite database', dest='db_file', default='Quizduell.sqlite', required=False) args = parser.parse_args() scraper = Scraper(args.username, args.password) connection = DBConnection(args.db_file) while True: for i in range(0, 10): if i == 0: scraper.give_up_all() connection.commit() print(connection.count_questions()) game = scraper.fetch_game() connection.add_question(game)
def handle(self, *args, **options): scraper = Scraper() start_time = round(time.time() * 1000) print("Getting all matchups for week %s" % options['week']) try: # Get all weeks and their dates for the season weeks = scraper.get_year_weeks(ScraperConstants.Men, options['year']) # Get date to scrape specified week date = [ week for week in weeks if int(week['wk']) == options['week'] ][0]['date'] except: traceback.print_exc() return try: # Get schedule for the week schedule = scraper.get_schedule(ScraperConstants.Men, date) except: traceback.print_exc() return # Create a list of (meet id, day of meet, meet name) meets = [] # For each day in the schedule with a meet on it for day in schedule: # For each meet on that day for meet in schedule[day]['meets']: # Create a name for the meet depenending on home vs. away teams or virtua; away_teams = meet['away_teams'] home_teams = meet['home_teams'] if home_teams == None: meet_name = "%s (Virtual)" % away_teams else: meet_name = "%s @ %s" % (away_teams, home_teams) # Add to list of (meet id, day of meet, meet name) meets.append((meet['meet_id'], day, meet_name)) # Keep track of new scores added and number skipped scores = [] num_skipped = 0 # Go through list of (meet id, day of meet, meet name) for meet_id, day, meet_name in meets: print("Getting meet results for %s" % meet_name) # Get the meet's results try: meet_results = scraper.get_meet_results( ScraperConstants.Men, meet_id) except: traceback.print_exc() return # Get the scores of every person who competed in the meet and save them # For each event in the meet's results for event_index_name in EVENT_NAMES_DICT: # Get each score for the event for score in meet_results[event_index_name]: # Lookup the gymnast who had the score gymnast = Gymnast.objects.filter( rtn_id=score['gid']).first() # Create a new score object score = Score(event=EVENT_NAMES_DICT[event_index_name], score=float(score['score']), gymnast=gymnast, date=day, meet=meet_name, week=options['week']) # Check if the score already exists in the database if Score.objects.filter( gymnast=gymnast, date=day, event=EVENT_NAMES_DICT[event_index_name], week=options['week']).exists(): num_skipped = num_skipped + 1 else: scores.append(score) # Save new scores to the database for score in scores: average = Average.objects.get(gymnast=score.gymnast, event=score.event) average.number_of_scores += 1 average.score = ( (average.score * (average.number_of_scores - 1)) + decimal.Decimal(score.score)) / average.number_of_scores average.save() score.save() print("") print("------ RESULTS ------") print("Added %s scores" % len(scores)) print("Skipped %s existing scores" % num_skipped) print("Took %s ms" % (round(time.time() * 1000) - start_time))
def default(): scraper = Scraper() return scraper.scrap()
def test_get_baths(): scraper = Scraper() assert scraper.get_baths(soup) == 3
def test_get_sqm(): scraper = Scraper() assert scraper.get_sqm(soup) == 232.26
def test_get_lot_size(): scraper = Scraper() assert scraper.get_lot_size(soup) == 0.115
def in_lineup_current_week(gymnast, team): scraper = Scraper() current_week = int( scraper.get_current_and_max_week(ScraperConstants.Men, datetime.now().year)['week']) return gymnast.LineUp.filter(week=current_week, team=team).exists()
def receive(self, text_data): text_data_json = json.loads(text_data) gymnast_pk = text_data_json['gymnast_pk'] user = self.scope['user'] # Get league league = League.objects.filter(pk=self.league_pk).first() # Get user's team team = FantasyTeam.objects.filter(user=user, league=self.league_pk).first() # Get the position that is up to draft currently_drafting = league.currently_drafting # Check if user who send draft request is currently up if team.draft_position == currently_drafting and not league.draft_complete and league.draft_started: # Do something here with the gymnast_pk and the team gymnast = get_object_or_404(Gymnast, pk=gymnast_pk) if gymnast not in league.drafted.all() and len(team.roster.all()) < league.roster_size: team.roster.add(gymnast) league = team.league league.drafted.add(gymnast) # Snake draft, initially going down num_teams = len(FantasyTeam.objects.filter(league=self.league_pk)) if league.going_down: # If last person is drafting if league.currently_drafting == (num_teams - 1): # Give them another turn and start going up league.going_down = False else: league.currently_drafting = league.currently_drafting + 1 else: # If first person is drafting on way back up if league.currently_drafting == 0: # Give first person another chance and start going down league.going_down = True else: league.currently_drafting = league.currently_drafting - 1 league.save() if len(league.drafted.all()) == league.roster_size * num_teams: # Drafting is done scraper = Scraper() year = datetime.date.today().year num_weeks = int(scraper.get_current_and_max_week(ScraperConstants.Men, year)['max']) matchups = round_robin_matchups(num_teams, num_weeks) team_pks = [x.pk for x in list(FantasyTeam.objects.filter(league__pk=self.league_pk))] # Creates matchups for entire season for week in matchups: for matchup in matchups[week]: team1_pk = team_pks[matchup[0] - 1] team2_pk = team_pks[matchup[1] - 1] team1 = FantasyTeam.objects.filter(pk=team1_pk).first() team2 = FantasyTeam.objects.filter(pk=team2_pk).first() m = Matchup(team1=team1, team2=team2, league=league, week=week) m.save() # Creates lineups for entire season events = ['FX', 'PH', 'SR', 'VT', 'PB', 'HB'] for i in range(6): if not LineUp.objects.filter(team=team1, event=events[i], week=week).exists(): LineUp.objects.create(team=team1, event=events[i], week=week) if not LineUp.objects.filter(team=team2, event=events[i], week=week).exists(): LineUp.objects.create(team=team2, event=events[i], week=week) league.draft_complete = True async_to_sync(self.channel_layer.group_send)(self.draft_group, { 'type': 'draft_complete', }) # PERFORM CHECK AND AUTO DRAFT HERE league.save() # Send message to rest of draft group async_to_sync(self.channel_layer.group_send)(self.draft_group, { 'type': 'gymnast_drafted', 'gymnast_pk': gymnast_pk, 'gymnast_name': gymnast.name, 'team_pk': team.pk, 'team_name': team.name, 'ncaa_team_name': gymnast.team, 'position_currently_drafting': league.currently_drafting, }) else: print("DRAFTING ERROR") async_to_sync(self.channel_layer.group_send)(self.draft_group, { 'type': 'gymnast_draft_error', 'error': 'Gymnast has already been drafted' }) else: async_to_sync(self.channel_layer.group_send)(self.draft_group, { 'type': 'gymnast_draft_error', 'error': 'Not your turn to draft' })