Ejemplo n.º 1
0
    def mine(self, date: datetime):
        """ If that date hasn't been scraped before, scrape it! """
        date_string = date.strftime('%d-%m-%Y')
        # Switch on the engine
        m = Scraper(date=date, session=self._session, server=self._server)

        # Been there, done that
        if date in self._miners:
            self._rec('{} has already been mined', date_string)
            m.close()

        else:
            # Go browse the web summary page for that day
            # and scrape off the job uuid request parameters.
            jobs = m.scrape_uuids()

            # I don't work on weekends
            if not jobs:
                self._rec('No jobs found for {}', date_string)

            else:
                for j in jobs:
                    # Grab the job's web page, regex it and store
                    # the collected fields in a sensible manner.
                    # We don't pickle the data yet: instead, we
                    # pickle multiple days at once before exit.
                    soup = m._get_job(j)
                    raw_data = m._scrape_job(soup)
                    m.process_job(raw_data)

                    # So wanna see results?
                    pp = PrettyPrinter()
                    pp.pprint(m.raw_data[0])                    # Job details
                    pp.pprint(m.raw_data[1])                    # Price table
                    [pp.pprint(d) for d in m.raw_data[2]]       # Addresses



                # We're never gonna scrape with a 100% success
                # rate, but let's do better next time!
                # TODO Hopefully remove this debug message later
                self._rec('Mined: {} successfully!', date_string)
                for message in m._warnings:
                    self._rec(message)