Esempio n. 1
0
class Crawler():
    def __init__(self, params):
        self.params = params
        self.log = {"fetching": None, "crawling": None}
        self.results = None
        self._request = None
        self._parser = None

    def get_all(self):
        self._request = RequestSinglePage(params=self.params)
        self._request.get()

        self.log["fetching"] = self._request.log

        if not self._request.log.successful:
            self.results = None
            return None

        self._parser = Parser(self._request.page_content)
        self._parser.extract_fields()
        self.results = self._parser.results
        self.log["crawling"] = self._parser._log
        self.results_df = pd.DataFrame(self.results)

    def get_pandas_df(self):
        return self.results_df
Esempio n. 2
0
single_row = RowParser(rows[0])
single_row.extract_fields()
print(single_row.extracted_content)

print("--------Wyciąganie danych ze wszystkich rezultatów--------------")

results = []
for i in rows:
    single_row = RowParser(i)

    single_row.extract_fields()
    results.append(single_row.extracted_content)

print(results)
print(f"length of results: {len(results)}")

print("test nowej klasy")
parser_ = Parser(a.page_content)
parser_.extract_fields()
print(parser_.results)

print(parser_._log)

print("test master obiektu ")

crawler = Crawler(search_params)
crawler.get_all()

print(crawler.results)
print(crawler.log)