Esempio n. 1
0
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')
        total_location_count: int = 0

        states: [str] = cls._get_states(req.get_page_bs(cls.base_url))
        states_len: int = len(states)
        state_count: int = 0
        for state_url in states:
            state_count = state_count + 1
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing state {state_count}/{states_len}.'
            )

            locations: [str] = cls._get_location(req.get_page_bs(state_url))
            locations_len: int = len(locations)
            total_location_count = total_location_count + locations_len
            location_count: int = 0
            for location_url in locations:
                location_count = location_count + 1
                sys.stdout.write(
                    f'\r{cls.company_name_upper}: Processing state {state_count}/{states_len} and location {location_count}/{locations_len}.'
                )

                profiles: [(str, str)
                           ] = cls._get_clinics(req.get_page_bs(location_url))
                for (profile_url, name) in profiles:
                    yield cls._parse_profile(req.get_page_bs(profile_url),
                                             name, profile_url)

        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {states_len} states to find {total_location_count} clinics.\n'
        )
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')
        total_location_count: int = 0

        communities: [[str]] = cls._get_communities(
            req.get_page_bs(cls.test_urls['communities'][1]))
        communities_len: int = len(communities)
        communities_count: int = 0
        for community_urls in communities:
            communities_count = communities_count + 1
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing community {communities_count}/{communities_len}.'
            )

            locations_len: int = len(community_urls)
            location_count: int = 0
            for url in community_urls:
                location_count = location_count + 1
                sys.stdout.write(
                    f'\r{cls.company_name_upper}: Processing community {communities_count}/{communities_len} and location {location_count}/{locations_len}.'
                )
                yield cls._get_clinic(req.get_page_bs(url), url)
            total_location_count = total_location_count + locations_len

        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {communities_len} communities to find {total_location_count} clinics.\n'
        )
class TestRequesterClass(unittest.TestCase):
    _req: Requester

    def setUp(self):
        self._req = Requester(0.2)

    def test_valid(self):
        self.assertIsNotNone(self._req.get_page_str('https://www.google.com'))
        self.assertIsNotNone(self._req.get_page_bs('https://www.google.com'))
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')
        total_location_count: int = 0

        resp = req.form_json('post',
                             'https://manage.fyzical.com/locationsearch',
                             data={
                                 'lng': -100,
                                 'lat': 40,
                                 'radius': 5000,
                             })
        locations = resp['data']

        locations_len: int = len(locations)
        total_location_count = total_location_count + locations_len
        location_count: int = 0
        for location in locations:
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing location {location_count}/{locations_len}.'
            )

            clinic: Clinic = cls._get_profile(location)
            yield clinic

        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {total_location_count} clinics.\n'
        )
Esempio n. 5
0
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')
        total_location_count: int = 0

        locations = req.form_json('get',
                                  f'{cls.base_url}/wp-admin/admin-ajax.php?',
                                  params={
                                      'action': 'store_search',
                                      'lat': 30,
                                      'long': -80,
                                      'max_results': 25,
                                      'search_radius': 50,
                                      'autoload': 1,
                                  })

        locations_len: int = len(locations)
        total_location_count = total_location_count + locations_len
        location_count: int = 0
        for location in locations:
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing location {location_count}/{locations_len}.'
            )

            clinic: Clinic = cls._get_profile(location)
            yield clinic

        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {total_location_count} clinics.\n'
        )
Esempio n. 6
0
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')
        total_location_count: int = 0

        states: [str] = cls._get_states(
            req.get_page_bs(cls.test_urls['states'][1]))
        states_len: int = len(states)
        state_count: int = 0
        for state in states:
            state_count = state_count + 1
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing state {state_count}/{states_len}.'
            )

            locations: [str] = cls._get_locations(
                req.form_json('post',
                              f'{cls.base_url}/wp-admin/admin-ajax.php?',
                              data={
                                  'action': 'markersearch',
                                  'method': 'state',
                                  'state': state,
                              }))

            locations_len: int = len(locations)
            total_location_count = total_location_count + locations_len
            location_count: int = 0
            for location_url in locations:
                location_count = location_count + 1
                sys.stdout.write(
                    f'\r{cls.company_name_upper}: Processing state {state_count}/{states_len} and location {location_count}/{locations_len}.'
                )

                clinic: Clinic = cls._get_profile(
                    req.get_page_bs(location_url), location_url)
                yield clinic

        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {states_len} states to find {total_location_count} clinics.\n'
        )
Esempio n. 7
0
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')
        total_location_count: int = 0

        states: [(str, str)] = cls._get_states(req.get_page_bs(cls.states_url))
        states_len: int = len(states)
        state_count: int = 0
        for (state_url, state) in states:
            state_count = state_count + 1
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing state {state_count}/{states_len}.'
            )

            locations: [Clinic] = cls._get_clinics(req.get_page_bs(state_url),
                                                   state)
            total_location_count = total_location_count + len(locations)
            for clinic in locations:
                yield clinic

        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {states_len} states to find {total_location_count} clinics.\n'
        )
Esempio n. 8
0
    def run(cls, req: Requester) -> Iterator[Clinic]:
        sys.stdout.write(f'\r{cls.company_name_upper}: Processing.')

        data = cls._get_data(req.get_page_bs(cls.base_url))
        data_len: int = len(data)
        data_count: int = 0
        for d in data:
            data_count = data_count + 1
            sys.stdout.write(
                f'\r{cls.company_name_upper}: Processing location {data_count}/{data_len}.'
            )
            yield cls._get_clinic(d)
        sys.stdout.write(
            f'\r{cls.company_name_upper}: Processed {data_len} clinics.\n')
 def setUp(self):
     self._req = Requester(0.2)
 def _get_clinic(cls, req: Requester, url) -> Clinic:
     raw_html: str = req.get_page_str(url)
     return cls._get_clinic_info(raw_html, url)
 def _get_locations(cls, req: Requester, url: str) -> [str]:
     raw_html: str = req.get_page_str(url)
     return cls._get_location_urls(raw_html)
Esempio n. 12
0
from pathlib import Path

from ptls.requester import Requester
from ptls.args import Args, get_args

args: Args = get_args(default_location='./data/test_files')
req: Requester = Requester(args.network_delay)


def download(file: Path, url: str):
    file.open('wb').write(req.get_page_str(url))


for scraper in args.scrapers:
    print(f'Downloading {scraper.company_name} test files...')

    scraper_path: Path = args.out_location.joinpath(scraper.company_name)
    scraper_path.mkdir(parents=True, exist_ok=True)

    for (p, url) in scraper.test_urls.values():
        download(scraper_path.joinpath(p), url)