Python Cacheの例

プログラミング言語: Python

名前空間/パッケージ名: civic_scraper.base.cache

クラス/型: Cache

hotexamples.comのコード掲載数: 13

Python Cache - 13件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcivic_scraper.base.cache.Cacheの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Cache(13)

write(1)

よく使われるメソッド

コード例 #1

0

ファイルを表示

ファイル: test_cache.py プロジェクト: biglocalnews/civic-scraper

def test_write(tmpdir):
    from civic_scraper.base.cache import Cache

    cache = Cache(tmpdir)
    content = "<h1>some content</h1>"
    file_path = "html/search_results_page.html"
    outfile = cache.write(file_path, content)
    scrape_dir = tmpdir.join("html")
    files = [f.basename for f in scrape_dir.listdir()]
    assert "search_results_page.html" in files
    actual_contents = file_contents(outfile)
    assert actual_contents == content

コード例 #2

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_cache_false_default(tmpdir):
    "Scrape should not cache search results pages by default"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-03"
    end_date = "2020-05-06"
    cp.scrape(start_date, end_date)
    actual_files = [f.basename for f in tmpdir.listdir()]
    assert actual_files == []

コード例 #3

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_download_default(tmpdir):
    "Scraper should not download file assets by default"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    cp.scrape(
        start_date,
        end_date,
    )
    target_dir = tmpdir.join("assets")
    assert not target_dir.exists()

コード例 #4

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_current_day_by_default(today_local_str, tmpdir):
    "Scrape should assume current day be default"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    cp.scrape(download=True)
    target_dir = tmpdir.join("assets")
    actual_files = set([f.basename for f in target_dir.listdir()])
    expected = set([
        'civicplus_nc-nashcounty_05052020-382_minutes.pdf',
        'civicplus_nc-nashcounty_05052020-382_agenda.pdf'
    ])
    assert actual_files == expected

コード例 #5

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_download_filter_type(tmpdir):
    "Downloads should be filterable by type"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    cp.scrape(
        start_date,
        end_date,
        download=True,
        asset_list=["minutes"],
    )
    target_dir = tmpdir.join("assets")
    actual_files = [f.basename for f in target_dir.listdir()]
    expected = ["civicplus_nc-nashcounty_05052020-382_agenda.pdf"]
    assert actual_files == expected

コード例 #6

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_download_true(tmpdir):
    "Setting download=True should download file assets"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    cp.scrape(
        start_date,
        end_date,
        download=True,
    )
    target_dir = tmpdir.join("assets")
    actual_files = set([f.basename for f in target_dir.listdir()])
    expected = set([
        "civicplus_nc-nashcounty_05052020-382_minutes.pdf",
        "civicplus_nc-nashcounty_05052020-382_agenda.pdf",
    ])
    assert actual_files == expected

コード例 #7

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_download_filter_both(tmpdir):
    "Downloads should be filterable by type and file size"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    # Below, minutes will be filtered due to its size exceeding 0.01MB
    # *and* agenda, which is approx 0.018 MB will be filtered because
    # of asset_list
    cp.scrape(
        start_date,
        end_date,
        download=True,
        asset_list=["agenda"],
        file_size=0.019,
    )
    target_dir = tmpdir.join("assets")
    actual_files = [f.basename for f in target_dir.listdir()]
    assert actual_files == []

コード例 #8

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_download_filter_size(tmpdir):
    "Downloads should be filterable by size in MB"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    # Byte sizes of two files for May 5, 2020
    # - Minutes/_05052020-382 = '28998'
    # - Agenda/_05052020-382 '19536'
    # 19536 bytes in agenda i.e. 0.0186309814453125  MBs
    cp.scrape(
        start_date,
        end_date,
        download=True,
        file_size=0.0186309814453125,
    )
    target_dir = tmpdir.join("assets")
    actual_files = [f.basename for f in target_dir.listdir()]
    expected = ["civicplus_nc-nashcounty_05052020-382_agenda.pdf"]
    assert actual_files == expected

コード例 #9

0

ファイルを表示

ファイル: test_civic_plus_site.py プロジェクト: biglocalnews/civic-scraper

def test_scrape_cache_true(tmpdir):
    "Setting cache to True should trigger caching of search results page"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-03"
    end_date = "2020-05-06"
    cp.scrape(
        start_date,
        end_date,
        cache=True,
    )
    artifacts_path = tmpdir.join("artifacts")
    actual_files = [f.basename for f in artifacts_path.listdir()]
    expected = [
        ("http__nc-nashcounty.civicplus.com__AgendaCenter__Search__QUERY"
         "term=&CIDs=all&startDate=05%2F03%2F2020"
         "&endDate=05%2F06%2F2020&dateRange=&dateSelector=")
    ]
    assert actual_files == expected
    # Spot check contents
    inpath = artifacts_path.join(expected[0])
    contents = file_contents(inpath)
    assert "Board of Commissioners" in contents

コード例 #10

0

ファイルを表示

ファイル: test_cache.py プロジェクト: biglocalnews/civic-scraper

def test_default_cache_dir(monkeypatch):
    target = "civic_scraper.utils.expanduser"
    with patch(target) as mock_method:
        mock_method.return_value = "/Users/you"
        cache = Cache()
        assert cache.path == "/Users/you/.civic-scraper"

コード例 #11

0

ファイルを表示

ファイル: test_cache.py プロジェクト: biglocalnews/civic-scraper

def test_custom_cache_path(tmpdir):
    from civic_scraper.base.cache import Cache

    cache = Cache(tmpdir)
    assert tmpdir == cache.path

コード例 #12

0

ファイルを表示

    def scrape(
        self,
        start_date,
        end_date,
        site_urls=[],
        cache=False,
        download=False,
    ):
        """Scrape file metadata and assets for a list of agency sites.

        For a given scraper, scrapes file artificate metadata and
        downloads file artificats. Automatically generats a metadata
        CSV of file assets.

        If requested, caches intermediate file artifacts such as HTML
        from scraped pages and downloads file assets such as agendas, minutes
        (caching and downloading are optional and are off by default).

        Args:

            start_date (str): Start date of scrape (YYYY-MM-DD)
            end_date (str): End date of scrape (YYYY-MM-DD)
            site_urls (list): List of site URLs
            cache (bool): Optionally cache intermediate file artificats such as HTML
                (default: False)
            download (bool): Optionally download file assets such as agendas (default: False)

        Outputs:
            Metadata CSV listing file assets for given sites and params.

        Returns:
            AssetCollection instance
        """
        asset_collection = AssetCollection()
        cache_obj = Cache(self.cache_path)
        logger.info(
            f"Scraping {len(site_urls)} site(s) from {start_date} to {end_date}..."
        )
        for url in site_urls:
            SiteClass = self._get_site_class(url)
            kwargs = {}
            if cache:
                kwargs["cache"] = cache_obj
            site = SiteClass(url, **kwargs)
            logger.info(f"\t{url}")
            _collection = site.scrape(
                start_date,
                end_date,
                cache=cache,
            )
            asset_collection.extend(_collection)
        metadata_file = asset_collection.to_csv(cache_obj.metadata_files_path)
        logger.info(f"Wrote asset metadata CSV: {metadata_file}")
        if download:
            download_counter = 0
            logger.info(
                f"Downloading {len(asset_collection)} file asset(s) to {cache_obj.assets_path}..."
            )
            for asset in asset_collection:
                # TODO: Add error-handling here
                logger.info(f"\t{asset.url}")
                asset.download(cache_obj.assets_path)
                download_counter += 1
        return asset_collection

コード例 #13

0

ファイルを表示

ファイル: test_cache_env_setting.py プロジェクト: biglocalnews/civic-scraper

def test_env_configured_default(monkeypatch):
    "CIVIC_SCRAPER_DIR env var should configure cache"
    monkeypatch.setenv("CIVIC_SCRAPER_DIR", "/tmp/civic-scraper")
    cache = Cache()
    assert cache.path == "/tmp/civic-scraper"