Ejemplo n.º 1
0
    def download(self, area_name, year, month, page=1):
        """
        指定した検索条件でホームページをダウンロードして、CSV に保存します
        """
        download_url = Config.get_url(area_name)
        download_file = Config.get_download_file(area_name, year, month, page)
        save_path = Config.get_download_path(download_file)
        if os.path.exists(save_path):
            _logger.info(
                "skip download for file exist {}".format(download_file))
            return

        form_data = self.get_form_data(year, month, page)
        req = urllib.request.Request(download_url, form_data)
        try:
            html_data = urllib.request.urlopen(req).read()
        except IncompleteRead as e:
            html_data = e.partial
        time.sleep(self.crawl_interval)

        self.check_html_no_data(html_data)
        if self.page_found:
            with open(save_path, mode="wb") as f:
                f.write(html_data)
            _logger.info("save {}".format(download_file))
Ejemplo n.º 2
0
def test_get_path():
    assert Config.get_datastore_path("choka.csv")
    assert Config.get_download_path("choka_daikoku_2021_04_001.html")
    assert Config.test_resource("daikoku1.html")
    assert Config.get_url("daikoku")
    assert Config.get_download_file("daikoku", 2021, 4)
    assert Config.get_db_path()
    assert Config.get_config_path("config.toml")
Ejemplo n.º 3
0
def test_get_download_file():
    assert Config.get_download_file("daikoku", 2021,
                                    4) == "choka_daikoku_2021_04_001.html"