def parsed_items():
    freezer = freeze_time('2018-10-12 12:00:00')
    freezer.start()
    minutes_req = file_response('files/chi_ssa_5_minutes.html')
    minutes_req.meta['items'] = spider._parse_current_year(
        file_response('files/chi_ssa_5.html'))
    parsed_items = [item for item in spider._parse_minutes(minutes_req)]
    freezer.stop()
    return parsed_items
Esempio n. 2
0
def parsed_items():
    freezer = freeze_time('2018-11-07')
    freezer.start()
    spider = ChiSsa42Spider()
    res = file_response('files/chi_ssa_42.html')
    minutes_res = file_response('files/chi_ssa_42_minutes.html')
    parsed_items = [item for item in spider._parse_items(res, upcoming=True)
                    ] + [item for item in spider._parse_items(minutes_res)]
    freezer.stop()
    return parsed_items
def parsed_items():
    freezer = freeze_time('2018-10-31')
    freezer.start()
    spider = ChiLowIncomeHousingTrustFundSpider()
    cal_res = file_response('files/chi_low_income_housing_trust_fund.html')
    parsed_items = []
    for item in spider._parse_calendar(cal_res):
        detail_res = file_response(
            'files/chi_low_income_housing_trust_fund_detail.html')
        detail_res.meta['item'] = item
        parsed_items.append(spider._parse_detail(detail_res))
    freezer.stop()
    return parsed_items
def parsed_item():
    freezer = freeze_time('2018-01-01 12:00:01')
    freezer.start()
    item = file_response('files/chi_citycouncil_event.json', url=INITIAL_REQUEST)
    parsed = spider._parse_item(item)
    freezer.stop()
    return parsed
Esempio n. 5
0
def parsed_items():
    freezer = freeze_time('2018-11-07')
    freezer.start()
    spider = ChiSsa17Spider()
    res = file_response('files/chi_ssa_17.html')
    parsed_items = [item for item in spider.parse(res)]
    freezer.stop()
    return parsed_items
Esempio n. 6
0
def test_gen_requests():
    spider = Cook_housingAuthoritySpider()
    test_response = file_response('files/hacc_feed.txt',
                                  'http://thehacc.org/events/feed/')
    requests = list(spider._gen_requests(test_response))
    assert requests == [
        'http://thehacc.org/wp-json/tribe/events/v1/events/2836',
        'http://thehacc.org/wp-json/tribe/events/v1/events/2816',
        'http://thehacc.org/wp-json/tribe/events/v1/events/2650',
        'http://thehacc.org/wp-json/tribe/events/v1/events/2882',
        'http://thehacc.org/wp-json/tribe/events/v1/events/2858',
        'http://thehacc.org/wp-json/tribe/events/v1/events/2879',
    ]
Esempio n. 7
0
def test_gen_html_filenames(monkeypatch):
    FILES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'files')
    test_filenames = [
        FILES_DIR + '/testspider_articles.html',
        FILES_DIR + '/testspider_staff.html',
        FILES_DIR + '/testspider_is-chicago-any-less-segregated.html',
    ]
    patched_fetch_url = Mock()
    patched_fetch_url.side_effect = [
        file_response('files/testspider_articles.html.example'),
        file_response('files/testspider_staff.html.example'),
        file_response(
            'files/testspider_is-chicago-any-less-segregated.html.example'),
    ]
    monkeypatch.setattr('scripts.generate_spider._fetch_url',
                        patched_fetch_url)
    rendered_filenames = generate_spider._gen_html(SPIDER_NAME,
                                                   SPIDER_START_URLS,
                                                   session=session)
    assert rendered_filenames == test_filenames
    for f in rendered_filenames:
        os.remove(f)
Esempio n. 8
0
def test_gen_html_content(monkeypatch):
    patched_fetch_url = Mock()
    patched_fetch_url.return_value = file_response(
        'files/testspider_articles.html.example')
    monkeypatch.setattr('scripts.generate_spider._fetch_url',
                        patched_fetch_url)
    rendered_filenames = generate_spider._gen_html(SPIDER_NAME,
                                                   [SPIDER_START_URLS[0]],
                                                   session=session)
    test_file_content = read_test_file_content(
        'files/testspider_articles.html.example')
    rendered_content = read_test_file_content('files/testspider_articles.html')
    test_dom = fromstring(test_file_content)
    rendered_dom = fromstring(rendered_content)
    test_title = test_dom.xpath('//title')[0].text
    rendered_title = rendered_dom.xpath('//title')[0].text
    assert test_title == rendered_title
    for f in rendered_filenames:
        os.remove(f)
Esempio n. 9
0
from datetime import date, time

import pytest
from tests.utils import file_response

from city_scrapers.constants import CANCELED
from city_scrapers.spiders.det_great_lakes_water_authority import DetGreatLakesWaterAuthoritySpider

test_response = file_response(
    'files/det_great_lakes_water_authority.html', 'http://www.glwater.org/events/'
)
spider = DetGreatLakesWaterAuthoritySpider()
requests = [request for request in spider.parse(test_response)]
test_ics_response = file_response(
    'files/det_great_lakes_water_authority.ics',
    'http://www.glwater.org/events/?ical=1&tribe_display=month'
)
parsed_items = [item for item in spider._parse_ical(test_ics_response)]
parsed_items = sorted(parsed_items, key=lambda x: (x['start']['date'], x['start']['time']))


def test_requests():
    requests = [request for request in spider.parse(test_response)]
    urls = {r.url for r in requests}
    # spider should yield ical for month + request for next month calendar
    assert len(requests) == 2
    assert urls == {
        'http://www.glwater.org/events/?ical=1&tribe_display=month',
        'http://www.glwater.org/events/2018-08/',
    }
Esempio n. 10
0
from datetime import date, time

import pytest
from freezegun import freeze_time
from tests.utils import file_response

from city_scrapers.spiders.wayne_economic_development import WayneEconomicDevelopmentSpider

freezer = freeze_time('2018-03-27 12:00:01')
freezer.start()
test_response = file_response(
    'files/wayne_economic-development.html',
    url=
    'https://www.waynecounty.com/elected/commission/economic-development.aspx')
spider = WayneEconomicDevelopmentSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]
freezer.stop()

# PARAMETRIZED TESTS


@pytest.mark.parametrize('item', parsed_items)
def test_event_description(item):
    assert item['event_description'] == ''


@pytest.mark.parametrize('item', parsed_items)
def test_location(item):
    expected_location = ({
# -*- coding: utf-8 -*-

import pytest

from tests.utils import file_response
from documenters_aggregator.spiders.il_pubhealth import Il_pubhealthSpider

test_response = file_response('files/il_pubhealth.html')
spider = Il_pubhealthSpider()
parsed_items = [item for item in spider.parse(test_response) if isinstance(item, dict)]


def test_name():
    assert parsed_items[2]['name'] == 'PAC: Maternal Mortality Review Committee Meeting'


def test_description():
    assert parsed_items[1]['description'] == """CONFERENCE ROOMS
122 S. Michigan, 7th Floor, Chicago, Room 711
535 West Jefferson St., 5th Floor, Springfield
Conference Call Information
Conference Call-In#: 888.494.4032
Access Code: 6819028741
Interested persons may contact the Office of Women’s Health at 312-814-4035 for information"""


def test_start_time():
    assert parsed_items[4]['start_time'] == '2017-08-09T11:00:00-05:00'


def test_end_time():
import pytest

# Shared properties between two different page / meeting types
import scrapy
from freezegun import freeze_time

from city_scrapers.spiders.det_brownfield_redevelopment_authority import DetBrownfieldRedevelopmentAuthoritySpider
from tests.utils import file_response

LOCATION = {'neighborhood': '', 'name': 'DEGC, Guardian Building', 'address': '500 Griswold, Suite 2200, Detroit'}

DBRA = 'Board of Directors'
DBRA_CAC = 'Community Advisory Committee'

test_response = file_response('files/det_brownfield_redevelopment_authority.html',
                              'http://www.degc.org/public-authorities/dbra/')
freezer = freeze_time('2018-07-28 12:00:01')
spider = DetBrownfieldRedevelopmentAuthoritySpider()
freezer.start()
parsed_items = [item for item in spider._next_meeting(test_response) if isinstance(item, dict)]
freezer.stop()


def test_initial_request_count():
    items = list(spider.parse(test_response))
    assert len(items) == 5
    urls = {r.url for r in items if isinstance(r, scrapy.Request)}
    assert urls == {
        'http://www.degc.org/public-authorities/dbra/fy-2018-2019-notices-agendas-and-minutes/',
        'http://www.degc.org/public-authorities/dbra/fy-2017-2018-meetings/',
        'http://www.degc.org/public-authorities/dbra/dbra-fy-2016-2017-meetings/'
Esempio n. 13
0
import pytest

from tests.utils import file_response
from documenters_aggregator.spiders.chi_pubhealth import Chi_pubhealthSpider

test_response = file_response(
    'files/chi_pubhealth.html',
    url=
    'https://www.cityofchicago.org/city/en/depts/cdph/supp_info/boh/2017-board-of-health.html'
)
spider = Chi_pubhealthSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]


def test_name():
    assert parsed_items[0]['name'] == 'Board of Health Meeting'


def test_description():
    assert parsed_items[0][
        'description'] == 'The Chicago Board of Health is scheduled to meet on the third Wednesday of each month from 9:00am-10:30am. The meetings are held at the Chicago Department of Public Health, DePaul Center, 333 S. State Street, 2nd Floor Board Room.'


def test_start_time():
    assert parsed_items[0]['start_time'] == '2017-01-18T09:00:00-06:00'


def test_end_time():
    assert parsed_items[0]['end_time'] == '2017-01-18T10:30:00-06:00'
Esempio n. 14
0
from datetime import date, time

import pytest
from freezegun import freeze_time
from tests.utils import file_response

from city_scrapers.spiders.det_charter_school_boards import DetCharterSchoolBoardsSpider

test_response = file_response(
    'files/det_charter_school_boards.html',
    'http://detroitk12.org/admin/charter_schools/boards/')

freezer = freeze_time('2018-08-15 12:00:01')
freezer.start()
spider = DetCharterSchoolBoardsSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]
freezer.stop()


def test_event_count():
    non_calendar_events = 8
    calendar_events = 81
    assert len(parsed_items) == non_calendar_events + calendar_events


def test_starts_with_day_of_week():
    # page is just on big block of text with meeting details nested in
    # p tags. Can id meetings by finding p tag text that starts with DoW
    assert spider._startswith_day_of_week() == \
from datetime import date, time

import pytest

from tests.utils import file_response
from city_scrapers.spiders.chi_landmark_commission import ChiLandmarkCommissionSpider

test_response = file_response(
    'files/chi_landmark_commission_landmarks_commission.html',
    'https://www.cityofchicago.org/city/en/depts/dcd/supp_info/landmarks_commission.html'
)
spider = ChiLandmarkCommissionSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]


def test_meeting_count():
    # 12 mtgs / yr * 10 yrs + 3 extra (May 2018, March 2014, June 2013)
    assert len(parsed_items) == 123


def test_unique_id_count():
    assert len(set([item['id'] for item in parsed_items])) == 123


def test__type():
    assert parsed_items[0]['_type'] == 'event'


def test_name():
Esempio n. 16
0
# -*- coding: utf-8 -*-
import pytest

from tests.utils import file_response
from city_scrapers.spiders.chi_schools import Chi_schoolsSpider

test_response = file_response('files/cpsboe.html', url='http://www.cpsboe.org/meetings/planning-calendar')
spider = Chi_schoolsSpider()
parsed_items = [item for item in spider.parse(test_response) if isinstance(item, dict)]


def test_event_count():
    assert len(parsed_items) == 14


@pytest.mark.parametrize('item', parsed_items)
def test_type(item):
    assert item['_type'] == 'event'


# def test_id():
#    assert parsed_items[0]['id'] == 'chi_schools/201707261030/x/monthly_board_meeting'


@pytest.mark.parametrize('item', parsed_items)
def test_name(item):
    assert item['name'] == 'Monthly Board Meeting'


@pytest.mark.parametrize('item', parsed_items)
def test_timezone(item):
Esempio n. 17
0
from datetime import date, time

import pytest

from tests.utils import file_response
from freezegun import freeze_time
from city_scrapers.constants import PASSED, TENTATIVE, COMMISSION
from city_scrapers.spiders.chi_ssa_14 import ChiSsa14Spider

test_response = file_response('files/chi_ssa_14.html')
spider = ChiSsa14Spider()

freezer = freeze_time('2018-10-12 12:00:00')
freezer.start()

parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]

freezer.stop()


def test_start():
    assert parsed_items[0]['start'] == {
        'date': date(2018, 4, 25),
        'time': time(19, 0),
        'note': ''
    }


def test_id():
from datetime import date, time

import pytest
from freezegun import freeze_time
from tests.utils import file_response

from city_scrapers.constants import COMMISSION, TENTATIVE
from city_scrapers.spiders.det_city_planning import DetCityPlanningSpider

freezer = freeze_time('2018-12-27')
freezer.start()

test_response = file_response(
    'files/det_city_planning.html',
    url=
    'https://www.detroitmi.gov/Government/Boards/City-Planning-Commission-Meetings'
)
spider = DetCityPlanningSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]
parsed_items = sorted(parsed_items, key=lambda x: x['start']['date'])

freezer.stop()


def test_name():
    assert parsed_items[0]['name'] == 'City Planning Commission'


def test_description():
import pytest
from datetime import date, time
from tests.utils import file_response
from city_scrapers.constants import COMMITTEE, CONFIRMED
from city_scrapers.spiders.cook_landbank import CookLandbankSpider

file = file_response('files/cook_landbank.json')
spider = CookLandbankSpider()

test_response = file
parsed_items = list(spider.parse(test_response))


def test_name():
    assert parsed_items[0]['name'] == 'CCLBA Land Transactions Committee'


def test_event_description():
    assert parsed_items[0]['event_description'] == (
        'The Land Transactions Committee will convene on Friday, September '
        '14th at the hour of 10:00 AM at the location of 69 W. Washington '
        'St., 22nd Floor, Conference Room ‘B”, Chicago, Illinois, to consider '
        'the following:'
    )


def test_start():
    EXPECTED_START = {
        'date': date(2018, 9, 14),
        'time': time(10, 00),
        'note': ''
import pytest
import scrapy

from datetime import datetime
from tests.utils import file_response
from documenters_aggregator.spiders.chi_buildings import Chi_buildingsSpider

test_json_response = file_response('files/chi_buildings.json')
test_event_response = file_response('files/chi_buildings.html')
spider = Chi_buildingsSpider()
# Setting spider date to time test files were generated
spider.calendar_date = datetime(2018, 2, 18)


class MockRequest(object):
    meta = {}

    def __getitem__(self, key):
        return self.meta['item'].get(key)


def mock_request(*args, **kwargs):
    mock = MockRequest()
    mock.meta = {'item': {}}
    return mock


setattr(scrapy, 'Request', mock_request)
parsed_items = [item for item in spider.parse(test_json_response)]
parsed_event = spider._parse_event(test_event_response)
Esempio n. 21
0
import pytest
from datetime import date, time

from tests.utils import file_response
from city_scrapers.spiders.chi_ward_night import ChiWardNightSpider, Calendar

test_response = file_response('files/test_chi_ward_night.json')
spider = ChiWardNightSpider(start_date=date(2017, 11, 1))
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]


def test_id():
    assert parsed_items[0]['id'] == (
        'chi_ward_night/201711071600/x/ward_night_ward_1'
    )


def test_name():
    assert parsed_items[0]['name'] == 'Ward Night: Ward 1'


def test_event_description():
    assert parsed_items[0]['event_description'] == (
        'Ward Night with Ald. Joe Moreno (Ward 1)\n\n'
        'Frequency: Weekly\n'
        'Day of the Week: Tuesday\n'
        'Requires Sign-Up: No\n'
        'Phone: 773.278.0101\n'
        'Email: [email protected]\n'
Esempio n. 22
0
from tests.utils import file_response
from city_scrapers.spiders.cook_county import Cook_countySpider

test_response = file_response(
    'files/cook_county_event.html',
    url=
    'https://www.cookcountyil.gov/event/cook-county-zoning-building-committee-6'
)
spider = Cook_countySpider()
item = spider._parse_event(test_response)


def test_name():
    assert item['name'] == 'ZBA Public Hearing'


def test_start_time():
    assert item['start_time'].isoformat() == '2017-11-15T13:00:00-06:00'


def test_end_time():
    assert item['end_time'].isoformat() == '2017-11-15T15:00:00-06:00'


# def test_id():
#    assert item['id'] == 'cook_county/201711151300/x/zba_public_hearing'


def test_all_day():
    assert item['all_day'] is False
Esempio n. 23
0
# -*- coding: utf-8 -*-

import pytest

from tests.utils import file_response
from documenters_aggregator.spiders.cook_hospitals import Cook_hospitalsSpider

test_response = file_response(
    'files/cook_hospitals.html',
    url=
    'http://www.cookcountyhhs.org/about-cchhs/governance/board-committee-meetings/'
)
spider = Cook_hospitalsSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]


def test_name():
    assert parsed_items[0]['name'] == 'Meetings of the Board of Directors'


def test_description():
    assert parsed_items[0][
        'description'] == 'http://www.cookcountyhhs.org/wp-content/uploads/2016/01/01-27-17-Board-Agenda.pdf'


def test_start_time():
    assert parsed_items[0]['start_time'].isoformat(
    ) == '2017-01-27T09:00:00-06:00'
Esempio n. 24
0
import calendar
from datetime import datetime
from pytz import timezone

from tests.utils import file_response
from documenters_aggregator.spiders.cook_pubhealth import Cook_pubhealthSpider

test_response = file_response(
    'files/cook_pubhealth_321.html',
    'http://www.cookcountypublichealth.org/events-view/321')
spider = Cook_pubhealthSpider()
item = spider.parse_event_page(test_response)


def test_name():
    assert item['name'] == 'Fresh Food Market: Robbins Health Center'


def test_description():
    expected_description = (
        'This summer, Fresh Food Markets will be hosted weekly, '
        '10:00 a.m. - 2:00 p.m. at three Cook County Health and Hospital System health '
        'centers in South Suburban Cook County.Robbins Health Center, 13450 W. Kedzie., Robbins.'
        'Black Oaks Center, a local nonprofit, is partnering with CCHHS to make fresh fruits and '
        'vegetables available for sale at CCHHS health centers. Cash, credit, and Link cards '
        '(SNAP/EBT/food stamps) are accepted as forms of payment. Persons using their SNAP/Link '
        'card benefits to purchase will receive a Link Match Coupon, as part of the Link Up '
        'Illinois program, good towards the next purchase of fruits and/or vegetables.If you '
        'have Medicaid or receive a medical card, you may be eligible for SNAP. Our partners '
        'at the Greater Chicago Food Depository can assist with SNAP applications. Visit their'
        'websiteor call 773-843-5416 to reach their Benefits Outreach team.')
Esempio n. 25
0
import pytest

from tests.utils import file_response
from city_scrapers.spiders.metra_board import Metra_boardSpider

test_response = file_response('files/metra_board.html')
spider = Metra_boardSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]


def test_name():
    assert parsed_items[0]['name'] == 'Metra February 2018 Board Meeting'


def test_start_time():
    assert parsed_items[0]['start_time'].isoformat(
    ) == '2018-02-21T10:30:00-06:00'


# def test_id():
#    assert parsed_items[0]['id'] == 'metra_board/201802211030/x/metra_february_2018_board_meeting'


def test_location():
    assert parsed_items[0]['location'] == {
        'url': '',
        'name': '',
        'address': '547 West Jackson Boulevard, Chicago, IL',
        'coordinates': {
from tests.utils import file_response
from documenters_aggregator.spiders.chi_city_college import Chi_cityCollegeSpider


test_response = file_response('files/ccc_event.html')
spider = Chi_cityCollegeSpider()
item = spider.parse_event_page(test_response)


def test_name():
    assert item['name'] == 'November 2017 Regular Board Meeting'


def test_start_time():
    assert item['start_time'].isoformat() == '2017-11-02T09:00:00-05:00'


def test_end_time():
    assert item['end_time'] is None


def test_id():
    assert item['id'] == 'chi_city_college/201711020900/x/november_2017_regular_board_meeting'


def test_all_day():
    assert item['all_day'] is False


def test_classification():
    assert item['classification'] == 'Not classified'
from datetime import datetime

import pytest
from city_scrapers_core.constants import COMMISSION, PASSED
from tests.utils import file_response

from city_scrapers.spiders.chi_zoning_board import ChiZoningBoardSpider

test_response = file_response(
    'files/chi_zoning_board.html',
    'https://www.chicago.gov/city/en/depts/dcd/supp_info/zoning_board_of_appeals.html'
)
spider = ChiZoningBoardSpider()
parsed_items = [item for item in spider.parse(test_response)]


def test_meeting_count():
    # 12 mtgs / yr * 10 yrs + 2 extra (May 2015, and August 2014)
    assert len(parsed_items) == 122


def test_unique_id_count():
    assert len(set([item['id'] for item in parsed_items])) == 122


def test_title():
    assert parsed_items[0]['title'] == 'Board of Appeals'


def test_description():
    assert parsed_items[0]['description'] == ''
from datetime import datetime

import pytest
from city_scrapers_core.constants import COMMISSION, PASSED
from freezegun import freeze_time
from tests.utils import file_response

from city_scrapers.spiders.chi_board_elections import ChiBoardElectionsSpider

freezer = freeze_time('2018-11-30 12:00:01')
freezer.start()
test_response = file_response(
    'files/chi_board_elections.html',
    url='https://app.chicagoelections.com/pages/en/board-meetings.aspx')
spider = ChiBoardElectionsSpider()
parsed_items = [item for item in spider._next_meeting(test_response)]


def test_title():
    assert parsed_items[0]['title'] == 'Electoral Board'


def test_description():
    assert parsed_items[0]['description'] == ''


def test_start():
    assert parsed_items[0]['start'] == datetime(2018, 11, 27, 9, 30)


def test_end():
Esempio n. 29
0
import pytest
from tests.utils import file_response
from documenters_aggregator.spiders.chi_library import Chi_librarySpider

# def test_tests():
#     print('Please write some tests for this spider or at least disable this one.')
#     assert False

test_response = file_response(
    'files/chi_library.html',
    url='https://www.chipublib.org/board-of-directors/board-meeting-schedule/')
spider = Chi_librarySpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]


def test_name():
    assert parsed_items[0]['name'] == 'Chicago Public Library Board Meeting'


def test_description():
    assert parsed_items[0][
        'description'] == 'There are no meetings in February, July and August. Entry into these meetings is permitted at 8:45 a.m.'


def test_start_time():
    assert parsed_items[0]['start_time'] == '2017-01-17T09:00:00-06:00'


def test_end_time():
Esempio n. 30
0
from datetime import date, time

import pytest
# Adapted from test_chi_parks.py
from freezegun import freeze_time

from tests.utils import file_response
from city_scrapers.spiders.wayne_public_services import WaynePublicServicesSpider

freezer = freeze_time('2018-03-27 12:00:01')
freezer.start()
test_response = file_response(
    'files/wayne_public_services.html',
    url='https://www.waynecounty.com/elected/commission/public-services.aspx')
spider = WaynePublicServicesSpider()
parsed_items = [
    item for item in spider.parse(test_response) if isinstance(item, dict)
]
freezer.stop()

# PARAMETRIZED TESTS


@pytest.mark.parametrize('item', parsed_items)
def test_event_description(item):
    assert item['event_description'] == ''


@pytest.mark.parametrize('item', parsed_items)
def test_location(item):
    expected_location = ({