Exemple #1
0
    def _get_fighter_name_and_link(self, ) -> Dict[str, List[str]]:
        fighter_name_and_link = {}
        fighter_name = ""

        l = len(self.fighter_group_urls)
        print("Scraping all fighter names and links: ")
        print_progress(0, l, prefix="Progress:", suffix="Complete")

        for index, fighter_group_url in enumerate(self.fighter_group_urls):
            soup = make_soup(fighter_group_url)
            table = soup.find("tbody")
            names = table.findAll("a", {"class": "b-link b-link_style_black"},
                                  href=True)
            for i, name in enumerate(names):
                if (i + 1) % 3 != 0:
                    if fighter_name == "":
                        fighter_name = name.text
                    else:
                        fighter_name = fighter_name + " " + name.text
                else:
                    fighter_name_and_link[fighter_name] = name["href"]
                    fighter_name = ""
            print_progress(index + 1, l, prefix="Progress:", suffix="Complete")

        return fighter_name_and_link
    def _get_total_fight_stats(
            cls, event_and_fight_links: Dict[str, List[str]]) -> str:
        total_stats = ""

        l = len(event_and_fight_links)
        print(f'Scraping data for {l} fights: ')
        print_progress(0, l, prefix="Progress:", suffix="Complete")

        for index, (event, fights) in enumerate(event_and_fight_links.items()):
            event_soup = make_soup(event)
            event_info = FightDataScraper._get_event_info(event_soup)

            # Get data for each fight in the event in parallel.
            with concurrent.futures.ThreadPoolExecutor(
                    max_workers=8) as executor:
                futures = []
                for fight in fights:
                    futures.append(
                        executor.submit(FightDataScraper._get_fight_stats_task,
                                        self=cls,
                                        fight=fight,
                                        event_info=event_info))
                for future in concurrent.futures.as_completed(futures):
                    fighter_stats = future.result()
                    if fighter_stats != "":
                        if total_stats == "":
                            total_stats = fighter_stats
                        else:
                            total_stats = total_stats + "\n" + fighter_stats
                    print_progress(index + 1,
                                   l,
                                   prefix="Progress:",
                                   suffix="Complete")

        return total_stats
        def get_fight_links(event_links: List[str]) -> Dict[str, List[str]]:
            event_and_fight_links = {}

            l = len(event_links)
            print("Scraping event and fight links: ")
            print_progress(0, l, prefix="Progress:", suffix="Complete")

            for index, link in enumerate(event_links):
                event_fights = []
                soup = make_soup(link)
                for row in soup.findAll(
                        "tr",
                    {
                        "class":
                        "b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click"
                    },
                ):
                    href = row.get("data-link")
                    event_fights.append(href)
                event_and_fight_links[link] = event_fights

                print_progress(index + 1,
                               l,
                               prefix="Progress:",
                               suffix="Complete")

            return event_and_fight_links
Exemple #4
0
    def _get_fighter_name_and_details(
            self, fighter_name_and_link: Dict[str, List[str]]) -> None:
        fighter_name_and_details = {}

        l = len(fighter_name_and_link)
        print("Scraping all fighter data: ")
        print_progress(0, l, prefix="Progress:", suffix="Complete")

        for index, (fighter_name,
                    fighter_url) in enumerate(fighter_name_and_link.items()):
            another_soup = make_soup(fighter_url)
            divs = another_soup.findAll(
                "li",
                {
                    "class":
                    "b-list__box-list-item b-list__box-list-item_type_block"
                },
            )
            data = []
            for i, div in enumerate(divs):
                if i == 9:
                    # An empty string is scraped here, let's not append that
                    continue
                data.append(
                    div.text.replace("  ", "").replace("\n", "").replace(
                        "Height:", "").replace("Weight:", "").replace(
                            "Reach:", "").replace("STANCE:", "").replace(
                                "DOB:", "").replace("SLpM:", "").replace(
                                    "Str. Acc.:",
                                    "").replace("SApM:", "").replace(
                                        "Str. Def:",
                                        "").replace("TD Avg.:", "").replace(
                                            "TD Acc.:", "").replace(
                                                "TD Def.:",
                                                "").replace("Sub. Avg.:", ""))

            fighter_name_and_details[fighter_name] = data
            print_progress(index + 1, l, prefix="Progress:", suffix="Complete")

        fighters_with_no_data = []
        for name, details in fighter_name_and_details.items():
            if len(details) != len(self.HEADER):
                fighters_with_no_data.append(name)

        [fighter_name_and_details.pop(name) for name in fighters_with_no_data]

        if not fighter_name_and_details:
            print("No new fighter data to scrape at the moment!")
            return

        self.new_fighters_exists = True

        # dump fighter_name_and_details as scraped_fighter_data_dict
        with open(self.SCRAPED_FIGHTER_DATA_DICT_PICKLE_PATH.as_posix(),
                  "wb") as f:
            pickle.dump(fighter_name_and_details, f)
Exemple #5
0
    def _get_fighter_name_and_details(
            self, fighter_name_and_link: Dict[str, List[str]]) -> None:
        fighter_name_and_details = {}

        l = len(fighter_name_and_link)
        print(f'Scraping data for {l} fighters: ')

        # Get fighter data in parallel.
        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
            futures = []
            for index, (fighter_name, fighter_url) in enumerate(
                    fighter_name_and_link.items()):
                futures.append(
                    executor.submit(
                        FighterDetailsScraper._get_fighter_data_task,
                        self=self,
                        fighter_name=fighter_name,
                        fighter_url=fighter_url))
            idx_progress = 0
            print_progress(0, l, prefix="Progress:", suffix="Complete")
            for future in concurrent.futures.as_completed(futures):
                fighter_name_and_details[future.result()
                                         [0]] = future.result()[1]
                print_progress(idx_progress + 1,
                               l,
                               prefix="Progress:",
                               suffix="Complete")
                idx_progress += 1

        fighters_with_no_data = []
        for name, details in fighter_name_and_details.items():
            if len(details) != len(self.HEADER):
                fighters_with_no_data.append(name)

        [fighter_name_and_details.pop(name) for name in fighters_with_no_data]

        if not fighter_name_and_details:
            print("No new fighter data to scrape at the moment!")
            return

        self.new_fighters_exists = True

        # dump fighter_name_and_details as scraped_fighter_data_dict
        with open(self.SCRAPED_FIGHTER_DATA_DICT_PICKLE_PATH.as_posix(),
                  "wb") as f:
            pickle.dump(fighter_name_and_details, f)
    def _get_total_fight_stats(cls, event_and_fight_links: Dict[str, List[str]]) -> str:
        total_stats = ""

        l = len(event_and_fight_links)
        print("Scraping all fight data: ")
        print_progress(0, l, prefix="Progress:", suffix="Complete")

        for index, (event, fights) in enumerate(event_and_fight_links.items()):
            event_soup = make_soup(event)
            event_info = FightDataScraper._get_event_info(event_soup)

            for fight in fights:
                try:
                    fight_soup = make_soup(fight)
                    fight_stats = FightDataScraper._get_fight_stats(fight_soup)
                    fight_details = FightDataScraper._get_fight_details(fight_soup)
                    result_data = FightDataScraper._get_fight_result_data(fight_soup)
                except Exception as e:
                    continue

                total_fight_stats = (
                    fight_stats
                    + ";"
                    + fight_details
                    + ";"
                    + event_info
                    + ";"
                    + result_data
                )

                if total_stats == "":
                    total_stats = total_fight_stats
                else:
                    total_stats = total_stats + "\n" + total_fight_stats

            print_progress(index + 1, l, prefix="Progress:", suffix="Complete")

        return total_stats