コード例 #1
0
def get_establishments(city):
    establishments_found = []

    start = 1
    count = 1000
    more = True

    while more:
        establishment_list = scrapertools.get_content(BASE_URL + city['establishmentUrl']+'&start='+str(start)+'&count='+str(count))
        if establishment_list.find(text='No documents found') is not None:
            more = False
            continue
        start += count
        establishments = establishment_list.find_all('tr')
        for establishment in establishments:
            details = establishment.find_all('td')
            if len(details) == 4 and details[0] is not None and details[0].a is not None:
                date = (None if scrapertools.get_text(details[3]) is None
                        else datetime.strptime(scrapertools.get_text(details[3]), '%d-%b-%Y'))
                # Removes establishment IDs and newlines from the establishment name
                name = re.sub('(#|\()(\s)*([0-9][0-9]-[0-9][0-9][0-9][0-9])(\))?', '', scrapertools.get_text(details[0]))
                name = re.sub('(\n)+', ' ', name)
                address = scrapertools.get_text(details[2])
                slug_id = slugify(name.strip() + ' ' + address)
                establishments_found.append({
                    'slug': slug_id,
                    'name': name.strip(),
                    'url': details[0].a['href'],
                    'address': address,
                    'locality': city['locality'],
                    'last_inspection_date': date,
                    'baseUrl': city['baseUrl'],
                    'inserted': datetime.now()
                })
    return establishments_found
コード例 #2
0
def get_establishments(city):
    establishments_found = []

    start = 1
    count = 1000
    more = True

    while more:
        establishment_list = scrapertools.get_content(BASE_URL + city['establishmentUrl']+'&start='+str(start)+'&count='+str(count))
        if establishment_list.find(text='No documents found') is not None:
            more = False
            continue
        start += count
        establishments = establishment_list.find_all('tr')
        for establishment in establishments:
            details = establishment.find_all('td')
            if len(details) == 4 and details[0] is not None and details[0].a is not None:
                date = (None if scrapertools.get_text(details[3]) is None
                        else datetime.strptime(scrapertools.get_text(details[3]), '%d-%b-%Y'))
                establishments_found.append({
                    'name': scrapertools.get_text(details[0]),
                    'url': details[0].a['href'],
                    'address': scrapertools.get_text(details[2]),
                    'locality': city['locality'],
                    'last_inspection_date': date
                })


    return establishments_found
コード例 #3
0
def get_inspections(establishment, city_url):
    inspections_found = []
    
    establishment_details = scrapertools.get_content(BASE_URL + establishment['url'])
    inspections = establishment_details.find_all(text='Inspection Type')[0].find_parent('tr').find_all_next('tr')

    for inspection in inspections:
        details = inspection.find_all('td')

        if details[0].a is None:
            continue
        
        violations = get_violations(BASE_URL + city_url + '/' + details[0].a['href'])
        inspections_found.append({
            'type': scrapertools.get_text(details[0]),
            'date': datetime.strptime(scrapertools.get_text(details[1]), '%d-%b-%Y'),
            'violations': violations
        })
    
    return inspections_found