Python Cache 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: civic_scraper.base.cache

클래스/타입: Cache

hotexamples.com에서의 예제들: 13

Python Cache - 13개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 civic_scraper.base.cache.Cache에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Cache(13)

write(1)

자주 사용되는 메소드들

예제 #1

0

파일 보기

파일: test_cache.py 프로젝트: biglocalnews/civic-scraper

def test_write(tmpdir):
    from civic_scraper.base.cache import Cache

    cache = Cache(tmpdir)
    content = "<h1>some content</h1>"
    file_path = "html/search_results_page.html"
    outfile = cache.write(file_path, content)
    scrape_dir = tmpdir.join("html")
    files = [f.basename for f in scrape_dir.listdir()]
    assert "search_results_page.html" in files
    actual_contents = file_contents(outfile)
    assert actual_contents == content

예제 #2

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_cache_false_default(tmpdir):
    "Scrape should not cache search results pages by default"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-03"
    end_date = "2020-05-06"
    cp.scrape(start_date, end_date)
    actual_files = [f.basename for f in tmpdir.listdir()]
    assert actual_files == []

예제 #3

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_download_default(tmpdir):
    "Scraper should not download file assets by default"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    cp.scrape(
        start_date,
        end_date,
    )
    target_dir = tmpdir.join("assets")
    assert not target_dir.exists()

예제 #4

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_current_day_by_default(today_local_str, tmpdir):
    "Scrape should assume current day be default"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    cp.scrape(download=True)
    target_dir = tmpdir.join("assets")
    actual_files = set([f.basename for f in target_dir.listdir()])
    expected = set([
        'civicplus_nc-nashcounty_05052020-382_minutes.pdf',
        'civicplus_nc-nashcounty_05052020-382_agenda.pdf'
    ])
    assert actual_files == expected

예제 #5

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_download_filter_type(tmpdir):
    "Downloads should be filterable by type"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    cp.scrape(
        start_date,
        end_date,
        download=True,
        asset_list=["minutes"],
    )
    target_dir = tmpdir.join("assets")
    actual_files = [f.basename for f in target_dir.listdir()]
    expected = ["civicplus_nc-nashcounty_05052020-382_agenda.pdf"]
    assert actual_files == expected

예제 #6

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_download_true(tmpdir):
    "Setting download=True should download file assets"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    cp.scrape(
        start_date,
        end_date,
        download=True,
    )
    target_dir = tmpdir.join("assets")
    actual_files = set([f.basename for f in target_dir.listdir()])
    expected = set([
        "civicplus_nc-nashcounty_05052020-382_minutes.pdf",
        "civicplus_nc-nashcounty_05052020-382_agenda.pdf",
    ])
    assert actual_files == expected

예제 #7

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_download_filter_both(tmpdir):
    "Downloads should be filterable by type and file size"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    # Below, minutes will be filtered due to its size exceeding 0.01MB
    # *and* agenda, which is approx 0.018 MB will be filtered because
    # of asset_list
    cp.scrape(
        start_date,
        end_date,
        download=True,
        asset_list=["agenda"],
        file_size=0.019,
    )
    target_dir = tmpdir.join("assets")
    actual_files = [f.basename for f in target_dir.listdir()]
    assert actual_files == []

예제 #8

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_download_filter_size(tmpdir):
    "Downloads should be filterable by size in MB"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-05"
    end_date = "2020-05-05"
    # Byte sizes of two files for May 5, 2020
    # - Minutes/_05052020-382 = '28998'
    # - Agenda/_05052020-382 '19536'
    # 19536 bytes in agenda i.e. 0.0186309814453125  MBs
    cp.scrape(
        start_date,
        end_date,
        download=True,
        file_size=0.0186309814453125,
    )
    target_dir = tmpdir.join("assets")
    actual_files = [f.basename for f in target_dir.listdir()]
    expected = ["civicplus_nc-nashcounty_05052020-382_agenda.pdf"]
    assert actual_files == expected

예제 #9

0

파일 보기

파일: test_civic_plus_site.py 프로젝트: biglocalnews/civic-scraper

def test_scrape_cache_true(tmpdir):
    "Setting cache to True should trigger caching of search results page"
    url = "http://nc-nashcounty.civicplus.com/AgendaCenter"
    cp = CivicPlusSite(url, cache=Cache(tmpdir))
    start_date = "2020-05-03"
    end_date = "2020-05-06"
    cp.scrape(
        start_date,
        end_date,
        cache=True,
    )
    artifacts_path = tmpdir.join("artifacts")
    actual_files = [f.basename for f in artifacts_path.listdir()]
    expected = [
        ("http__nc-nashcounty.civicplus.com__AgendaCenter__Search__QUERY"
         "term=&CIDs=all&startDate=05%2F03%2F2020"
         "&endDate=05%2F06%2F2020&dateRange=&dateSelector=")
    ]
    assert actual_files == expected
    # Spot check contents
    inpath = artifacts_path.join(expected[0])
    contents = file_contents(inpath)
    assert "Board of Commissioners" in contents

예제 #10

0

파일 보기

파일: test_cache.py 프로젝트: biglocalnews/civic-scraper

def test_default_cache_dir(monkeypatch):
    target = "civic_scraper.utils.expanduser"
    with patch(target) as mock_method:
        mock_method.return_value = "/Users/you"
        cache = Cache()
        assert cache.path == "/Users/you/.civic-scraper"

예제 #11

0

파일 보기

파일: test_cache.py 프로젝트: biglocalnews/civic-scraper

def test_custom_cache_path(tmpdir):
    from civic_scraper.base.cache import Cache

    cache = Cache(tmpdir)
    assert tmpdir == cache.path

예제 #12

0

파일 보기

    def scrape(
        self,
        start_date,
        end_date,
        site_urls=[],
        cache=False,
        download=False,
    ):
        """Scrape file metadata and assets for a list of agency sites.

        For a given scraper, scrapes file artificate metadata and
        downloads file artificats. Automatically generats a metadata
        CSV of file assets.

        If requested, caches intermediate file artifacts such as HTML
        from scraped pages and downloads file assets such as agendas, minutes
        (caching and downloading are optional and are off by default).

        Args:

            start_date (str): Start date of scrape (YYYY-MM-DD)
            end_date (str): End date of scrape (YYYY-MM-DD)
            site_urls (list): List of site URLs
            cache (bool): Optionally cache intermediate file artificats such as HTML
                (default: False)
            download (bool): Optionally download file assets such as agendas (default: False)

        Outputs:
            Metadata CSV listing file assets for given sites and params.

        Returns:
            AssetCollection instance
        """
        asset_collection = AssetCollection()
        cache_obj = Cache(self.cache_path)
        logger.info(
            f"Scraping {len(site_urls)} site(s) from {start_date} to {end_date}..."
        )
        for url in site_urls:
            SiteClass = self._get_site_class(url)
            kwargs = {}
            if cache:
                kwargs["cache"] = cache_obj
            site = SiteClass(url, **kwargs)
            logger.info(f"\t{url}")
            _collection = site.scrape(
                start_date,
                end_date,
                cache=cache,
            )
            asset_collection.extend(_collection)
        metadata_file = asset_collection.to_csv(cache_obj.metadata_files_path)
        logger.info(f"Wrote asset metadata CSV: {metadata_file}")
        if download:
            download_counter = 0
            logger.info(
                f"Downloading {len(asset_collection)} file asset(s) to {cache_obj.assets_path}..."
            )
            for asset in asset_collection:
                # TODO: Add error-handling here
                logger.info(f"\t{asset.url}")
                asset.download(cache_obj.assets_path)
                download_counter += 1
        return asset_collection

예제 #13

0

파일 보기

파일: test_cache_env_setting.py 프로젝트: biglocalnews/civic-scraper

def test_env_configured_default(monkeypatch):
    "CIVIC_SCRAPER_DIR env var should configure cache"
    monkeypatch.setenv("CIVIC_SCRAPER_DIR", "/tmp/civic-scraper")
    cache = Cache()
    assert cache.path == "/tmp/civic-scraper"