Esempio n. 1
0
    def clean_list_record(self, record):
        record['CALL_DATE'] = parse_date(record['CALL_DATE'], '%Y-%m-%d')
        record['OFFENSE_DATE'] = parse_date(record['OFFENSE_DATE'], '%Y-%m-%d')
        record['REPORT_DATE'] = parse_date(record['REPORT_DATE'], '%Y-%m-%d')
        record['COMMON_LOCATION'] = record['COMMON_LOCATION'].strip()
        record['ADDRESS_NBR'] = record['ADDRESS_NBR'].strip()
        record['ADDRESS'] = record['ADDRESS'].strip()

        # The 'NARRATIVE' field includes time and disposition data. Parse that out.
        m = re.search(
            r'^Time: (?P<TIME>\d\d?:\d\d)<br>.*?<br>Disposition: (?P<DISPOSITION>.*)$',
            record.pop('NARRATIVE'))
        record.update(m.groupdict())

        record['TIME'] = parse_time(record['TIME'], '%H:%M')

        # Set location_name. The logic is different depending on the ADDRESS_TYPE.
        address_type = record['ADDRESS_TYPE']
        if address_type == 'PREMISE ADDRESS':
            record['location_name'] = '%s block of %s' % (
                record['ADDRESS_NBR'], clean_address(record['ADDRESS']))
        elif address_type == 'INTERSECTION':
            if '/' in record['ADDRESS']:
                streets = record['ADDRESS'].split('/')
                record['location_name'] = '%s and %s' % (clean_address(
                    streets[0]), clean_address(streets[1]))
            else:
                record['location_name'] = clean_address(record['ADDRESS'])
        elif address_type == 'GEO-OVERRIDE':
            record['location_name'] = clean_address(record['ADDRESS'])
        elif address_type == 'COMMON LOCATION':
            if record['ADDRESS_NBR'] and record['ADDRESS']:
                record['location_name'] = '%s %s' % (
                    record['ADDRESS_NBR'], clean_address(record['ADDRESS']))
            elif record['ADDRESS'] and record['COMMON_LOCATION']:
                record['location_name'] = '%s (%s)' % (clean_address(
                    record['ADDRESS']), clean_address(
                        record['COMMON_LOCATION']))
            elif record['COMMON_LOCATION']:
                record['location_name'] = clean_address(
                    record['COMMON_LOCATION'])
            elif record['ADDRESS']:
                record['location_name'] = clean_address(record['ADDRESS'])
            else:
                record['location_name'] = 'Unknown'
        else:
            record['location_name'] = 'Unknown'

        try:
            d = CUSTOM_CATEGORIES[record['ORIG_CRIMETYPE_NAME']]
        except KeyError:
            d = ('Unknown', 'Unknown')
        record['broad_category'], record['detail_category'] = d

        return record
Esempio n. 2
0
    def clean_list_record(self, record):
        strip_dict(record)
        try:
            record['filing_date'] = parse_date(str(int(record['filing_dat'])), '%m%d%y')
        except ValueError:
            record['filing_date'] = None
        if record['filing_date'] is None:
            self.logger.info('Skipping invalid filing date %r', record['filing_dat'])
            raise SkipRecord
        record['address'] = clean_address(record.pop('address'))
        record['case_number'] = record.pop('case_#')
        record['document_number'] = record.pop('document_#')
        record['pin_number'] = record.pop('pin_number')
        try:
            record['year_of_mortgage'] = str(record.pop('year_of_mo').year)
        except AttributeError:
            record['year_of_mortgage'] = 'Unknown'

        # Normalize inconsistent headers
        for old, new in (('SF', 'sf'), ('SMF', 'smf'), ('Condo', 'condo')):
            try:
                record[new] = record.pop(old)
            except KeyError:
                pass

        if int(record['sf']):
            record['property_type'] = 'Single family'
        elif int(record['smf']):
            record['property_type'] = 'Multi-unit'
        elif int(record['condo']):
            record['property_type'] = 'Condo'
        else:
            record['property_type'] = 'Unknown'

        return record
Esempio n. 3
0
    def clean_list_record(self, record):
        # Collapse the violations into a single value, rather than 58 values,
        # most of which are zero.
        num_violations = []
        for i in range(1, 59):
            val = int(record.pop('vio%s' % i))
            if val:
                num_violations.append((str(i), val))
        record['violations'] = num_violations

        record['inspection_date'] = parse_date(record['inspection_date'],
                                               '%m/%d/%Y')
        record['address'] = clean_address(record['address'])

        if record['city'] not in self.city_names:
            raise SkipRecord('Skipping city %s' % record['city'])

        record['city'] = record['city'].title()
        record['visit_number'] = int(record['visit_number'])
        record['critical_violations'] = int(record['critical_violations'])
        record['noncritical_violations'] = int(
            record['noncritical_violations'])
        record['total_violations'] = int(record['total_violations'])
        record['inspection_number'] = int(record['inspection_number'])

        return record
Esempio n. 4
0
    def save(self, old_record, list_record, detail_record):
        for record in detail_record:
            # Since parse_detail emits more than one record, we check for existing
            # records here rather than in self.existing_record()
            try:
                qs = NewsItem.objects.filter(schema__id=self.schema.id, item_date=record['inspection_date'])
                obj = qs.by_attribute(self.schema_fields['facility_id'], list_record['facid'])[0]
            except IndexError:
                pass
            else:
                return None

            inspection_type_lookup = self.get_or_create_lookup('inspection_type', record['inspection_type'], record['inspection_type'], make_text_slug=False)
            violations_lookups = []
            for violation in record['violations']:
                lookup = self.get_or_create_lookup('violations', violation, violation, make_text_slug=False)
                violations_lookups.append(lookup)
            attributes = {
                'name': list_record['name'],
                'inspection_type': inspection_type_lookup.id,
                'violations': ','.join([str(l.id) for l in violations_lookups]),
                'facility_id': list_record['facid'],
            }
            self.create_newsitem(
                attributes,
                title=smart_title(list_record['name']),
                url=self.detail_uri % list_record['facid'],
                item_date=record['inspection_date'],
                location_name=clean_address(list_record['location'])
            )
Esempio n. 5
0
 def clean_list_record(self, record):
     if record['county'].upper().strip() not in self.counties:
         raise SkipRecord('Record not in %s.' % self.counties)
     record['activity_date'] = parse_date(record['activity_date'], '%m/%d/%Y')
     record['dba'] = smart_title(record['dba'])
     record['address'] = clean_address(record['address'])
     return record
Esempio n. 6
0
 def clean_list_record(self, record):
     if record['county'].upper().strip() not in self.counties:
         raise SkipRecord('Record not in %s.' % self.counties)
     record['activity_date'] = parse_date(record['activity_date'],
                                          '%m/%d/%Y')
     record['dba'] = smart_title(record['dba'])
     record['address'] = clean_address(record['address'])
     return record
Esempio n. 7
0
    def clean_list_record(self, record):
        record['CALL_DATE'] = parse_date(record['CALL_DATE'], '%Y-%m-%d')
        record['OFFENSE_DATE'] = parse_date(record['OFFENSE_DATE'], '%Y-%m-%d')
        record['REPORT_DATE'] = parse_date(record['REPORT_DATE'], '%Y-%m-%d')
        record['COMMON_LOCATION'] = record['COMMON_LOCATION'].strip()
        record['ADDRESS_NBR'] = record['ADDRESS_NBR'].strip()
        record['ADDRESS'] = record['ADDRESS'].strip()

        # The 'NARRATIVE' field includes time and disposition data. Parse that out.
        m = re.search(r'^Time: (?P<TIME>\d\d?:\d\d)<br>.*?<br>Disposition: (?P<DISPOSITION>.*)$', record.pop('NARRATIVE'))
        record.update(m.groupdict())

        record['TIME'] = parse_time(record['TIME'], '%H:%M')

        # Set location_name. The logic is different depending on the ADDRESS_TYPE.
        address_type = record['ADDRESS_TYPE']
        if address_type == 'PREMISE ADDRESS':
            record['location_name'] = '%s block of %s' % (record['ADDRESS_NBR'], clean_address(record['ADDRESS']))
        elif address_type == 'INTERSECTION':
            if '/' in record['ADDRESS']:
                streets = record['ADDRESS'].split('/')
                record['location_name'] = '%s and %s' % (clean_address(streets[0]), clean_address(streets[1]))
            else:
                record['location_name'] = clean_address(record['ADDRESS'])
        elif address_type == 'GEO-OVERRIDE':
            record['location_name'] = clean_address(record['ADDRESS'])
        elif address_type == 'COMMON LOCATION':
            if record['ADDRESS_NBR'] and record['ADDRESS']:
                record['location_name'] = '%s %s' % (record['ADDRESS_NBR'], clean_address(record['ADDRESS']))
            elif record['ADDRESS'] and record['COMMON_LOCATION']:
                record['location_name'] = '%s (%s)' % (clean_address(record['ADDRESS']), clean_address(record['COMMON_LOCATION']))
            elif record['COMMON_LOCATION']:
                record['location_name'] = clean_address(record['COMMON_LOCATION'])
            elif record['ADDRESS']:
                record['location_name'] = clean_address(record['ADDRESS'])
            else:
                record['location_name'] = 'Unknown'
        else:
            record['location_name'] = 'Unknown'

        try:
            d = CUSTOM_CATEGORIES[record['ORIG_CRIMETYPE_NAME']]
        except KeyError:
            d = ('Unknown', 'Unknown')
        record['broad_category'], record['detail_category'] = d

        return record
Esempio n. 8
0
 def save(self, old_record, list_record, detail_record):
     values = {
         'title': self.get_title(list_record),
         'item_date': list_record['approval_date'],
         'location_name': clean_address('%s, %s' % (list_record['street'].strip(), list_record['city'])),
     }
     attributes = self.get_attributes(list_record)
     if old_record is None:
         self.create_newsitem(attributes, **values)
     else:
         self.update_existing(old_record, values, attributes)
Esempio n. 9
0
 def save(self, old_record, list_record, detail_record):
     values = {
         'title': self.get_title(list_record),
         'item_date': list_record['approval_date'],
         'location_name': clean_address('%s, %s' % (list_record['street'].strip(), list_record['city'])),
     }
     attributes = self.get_attributes(list_record)
     if old_record is None:
         self.create_newsitem(attributes, **values)
     else:
         self.update_existing(old_record, values, attributes)
Esempio n. 10
0
 def save(self, old_record, list_record, detail_record):
     values = {
         "title": self.get_title(list_record),
         "item_date": list_record["approval_date"],
         "location_name": clean_address("%s, %s" % (list_record["street"].strip(), list_record["city"])),
     }
     attributes = self.get_attributes(list_record)
     if old_record is None:
         self.create_newsitem(attributes, **values)
     else:
         self.update_existing(old_record, values, attributes)
Esempio n. 11
0
    def clean_list_record(self, record):
        record['application_date'] = parse_date(record['application_date'], '%m/%d/%Y')

        try:
            record['review_date'] = parse_date(record['review_date'], '%m/%d/%Y')
        except ValueError: # sometimes it's 'n/a'
            record['review_date'] = None

        record['address'] = strip_unit(clean_address(record['address']))
        if record['city'] not in self.city_names:
            raise SkipRecord('Skipping city %s' % record['city'])
        record['city'] = record['city'].title()
        return record
Esempio n. 12
0
    def clean_list_record(self, record):
        record['application_date'] = parse_date(record['application_date'], '%m/%d/%Y')

        try:
            record['review_date'] = parse_date(record['review_date'], '%m/%d/%Y')
        except ValueError: # sometimes it's 'n/a'
            record['review_date'] = None

        record['address'] = strip_unit(clean_address(record['address']))
        if record['city'] not in self.city_names:
            raise SkipRecord('Skipping city %s' % record['city'])
        record['city'] = record['city'].title()
        return record
Esempio n. 13
0
 def save(self, old_record, list_record, detail_record):
     address = clean_address(list_record['address'])
     attributes = {
         'place_name': list_record.get('place_name', ''),
         'rack_id': list_record['rack_id'],
         'rack_count': list_record['rack_count']
     }
     values = {
         'title': 'Bike rack installed near %s' % list_record.get('place_name', address),
         'item_date': list_record['installation_date'],
         'location_name': address,
         'url': list_record['url']
     }
     if old_record is None:
         self.create_newsitem(attributes, **values)
     else:
         self.update_existing(old_record, values, attributes)
Esempio n. 14
0
 def save(self, old_record, list_record, detail_record):
     address = clean_address(list_record['address'])
     attributes = {
         'place_name': list_record.get('place_name', ''),
         'rack_id': list_record['rack_id'],
         'rack_count': list_record['rack_count']
     }
     values = {
         'title': 'Bike rack installed near %s' % list_record.get('place_name', address),
         'item_date': list_record['installation_date'],
         'location_name': address,
         'url': list_record['url']
     }
     if old_record is None:
         self.create_newsitem(attributes, **values)
     else:
         self.update_existing(old_record, values, attributes)
Esempio n. 15
0
    def clean_list_record(self, record):
        # Save the raw address so we can use it to find duplicate records in
        # the future.
        address = smart_title(record['address'].strip().replace('&amp;', '&').replace('&nbsp;', ' ')).strip()
        record['raw_address'] = address
        record['address'] = address_to_block(clean_address(address))

        record['disposition'] = record['disposition'].replace('&amp;', '&').replace('&nbsp;', ' ').strip() or 'Not available'
        record['event'] = record['event'].replace('&amp;', '&').replace('&nbsp;', ' ').strip()
        item_date = parse_date(record['datetime'], '%m/%d/%Y %I:%M:%S %p', return_datetime=True)
        record['item_date'] = item_date.date()
        record['item_time'] = item_date.time()

        # Normalize this value.
        if record['disposition'] == 'CANCCOMM':
            record['disposition'] = 'CANCELLED BY COMMUNICATIONS'

        return record
Esempio n. 16
0
    def clean_list_record(self, record):
        if record['last_inspection_date'].lower() == 'not available':
            raise SkipRecord('No inspection available')
        else:
            record['last_inspection_date'] = parse_date(record['last_inspection_date'], '%m/%d/%Y')
        if record['aka']:
            record['aka'] = list_aka_re.findall(record['aka'])[0]
        else:
            record['aka'] = ''
        norm_dict_space(record, 'name', 'dba', 'address')
        record['result'] = record['result'].replace('&nbsp;', '').strip()
        record['city_id'] = int(record['city_id'])

        # Remove the trailing ZIP code from the address, if it exists.
        m = re.search(r'(.*?)\s+\d\d\d\d\d$', record['address'])
        if m:
            record['address'] = m.group(1)
        record['address'] = clean_address(record['address'])

        return record
Esempio n. 17
0
    def clean_list_record(self, record):
        if record["City"].strip().upper() != "CHICAGO":
            raise SkipRecord
        if record["Amount"].strip().upper() == "UNKNOWN":
            record["Amount"] = None
        else:
            record["Amount"] = record["Amount"].replace(".00", "").replace("$", "").replace(",", "")

        record["Executed"] = parse_date(record["Executed"], "%m/%d/%Y")
        record["Recorded"] = parse_date(record["Recorded"], "%m/%d/%Y")

        record["clean_address"] = clean_address(record["Address"])
        unit = record["Unit #"] not in ("", "MANY") and record["Unit #"] or None
        record["clean_address_with_unit"] = "%s%s" % (record["clean_address"], (unit and ", unit " + unit or ""))

        try:
            record["doc_number"] = record["Doc Number"]
        except KeyError:
            record["doc_number"] = record["Doc #"]

        return record
Esempio n. 18
0
    def clean_list_record(self, record):
        if record['City'].strip().upper() != 'CHICAGO':
            raise SkipRecord
        if record['Amount'].strip().upper() == 'UNKNOWN':
            record['Amount'] = None
        else:
            record['Amount'] = record['Amount'].replace('.00', '').replace('$', '').replace(',', '')

        record['Executed'] = parse_date(record['Executed'], '%m/%d/%Y')
        record['Recorded'] = parse_date(record['Recorded'], '%m/%d/%Y')

        record['clean_address'] = clean_address(record['Address'])
        unit = record['Unit #'] not in ('', 'MANY') and record['Unit #'] or None
        record['clean_address_with_unit'] = '%s%s' % (record['clean_address'], (unit and ', unit ' + unit or ''))

        try:
            record['doc_number'] = record['Doc Number']
        except KeyError:
            record['doc_number'] = record['Doc #']

        return record
Esempio n. 19
0
    def clean_list_record(self, record):
        if record['last_inspection_date'].lower() == 'not available':
            raise SkipRecord('No inspection available')
        else:
            record['last_inspection_date'] = parse_date(
                record['last_inspection_date'], '%m/%d/%Y')
        if record['aka']:
            record['aka'] = list_aka_re.findall(record['aka'])[0]
        else:
            record['aka'] = ''
        norm_dict_space(record, 'name', 'dba', 'address')
        record['result'] = record['result'].replace('&nbsp;', '').strip()
        record['city_id'] = int(record['city_id'])

        # Remove the trailing ZIP code from the address, if it exists.
        m = re.search(r'(.*?)\s+\d\d\d\d\d$', record['address'])
        if m:
            record['address'] = m.group(1)
        record['address'] = clean_address(record['address'])

        return record
Esempio n. 20
0
    def save(self, old_record, list_record, detail_record):
        for record in detail_record:
            # Since parse_detail emits more than one record, we check for existing
            # records here rather than in self.existing_record()
            try:
                qs = NewsItem.objects.filter(
                    schema__id=self.schema.id,
                    item_date=record['inspection_date'])
                obj = qs.by_attribute(self.schema_fields['facility_id'],
                                      list_record['facid'])[0]
            except IndexError:
                pass
            else:
                return None

            inspection_type_lookup = self.get_or_create_lookup(
                'inspection_type',
                record['inspection_type'],
                record['inspection_type'],
                make_text_slug=False)
            violations_lookups = []
            for violation in record['violations']:
                lookup = self.get_or_create_lookup('violations',
                                                   violation,
                                                   violation,
                                                   make_text_slug=False)
                violations_lookups.append(lookup)
            attributes = {
                'name': list_record['name'],
                'inspection_type': inspection_type_lookup.id,
                'violations':
                ','.join([str(l.id) for l in violations_lookups]),
                'facility_id': list_record['facid'],
            }
            self.create_newsitem(attributes,
                                 title=smart_title(list_record['name']),
                                 url=self.detail_uri % list_record['facid'],
                                 item_date=record['inspection_date'],
                                 location_name=clean_address(
                                     list_record['location']))
Esempio n. 21
0
    def clean_list_record(self, record):
        for k, v in record.items():
            v = strip_tags(v)
            record[k] = re.sub(r'(?s)\s\s+', ' ', v).strip()

        # Remove the "Suite/Apt" or "Floor" clause from the address, if it exists.
        record['address'] = record['address'].replace(' ,', ',')
        m = re.search(r'^(.*?), (?:Suite/Apt|Floor):.*$', record['address'])
        if m:
            record['address'] = m.group(1)
        record['address'] = clean_address(record['address'])

        if record['dba'] in SKIPPED_DBAS:
            raise SkipRecord('Skipping %r' % record['dba'])

        # For privacy reasons, skip individuals.
        if record['structure'].upper().strip() == 'INDIVIDUAL':
            raise SkipRecord('Skipping structure=individual')

        record['city_id'] = int(record['city_id'])
        record['site_id'] = int(record['site_id'])
        return record
Esempio n. 22
0
    def clean_list_record(self, record):
        for k, v in record.items():
            v = strip_tags(v)
            record[k] = re.sub(r'(?s)\s\s+', ' ', v).strip()

        # Remove the "Suite/Apt" or "Floor" clause from the address, if it exists.
        record['address'] = record['address'].replace(' ,', ',')
        m = re.search(r'^(.*?), (?:Suite/Apt|Floor):.*$', record['address'])
        if m:
            record['address'] = m.group(1)
        record['address'] = clean_address(record['address'])

        if record['dba'] in SKIPPED_DBAS:
            raise SkipRecord('Skipping %r' % record['dba'])

        # For privacy reasons, skip individuals.
        if record['structure'].upper().strip() == 'INDIVIDUAL':
            raise SkipRecord('Skipping structure=individual')

        record['city_id'] = int(record['city_id'])
        record['site_id'] = int(record['site_id'])
        return record
Esempio n. 23
0
    def clean_list_record(self, record):
        strip_dict(record)
        try:
            record['filing_date'] = parse_date(str(int(record['filing_dat'])),
                                               '%m%d%y')
        except ValueError:
            record['filing_date'] = None
        if record['filing_date'] is None:
            self.logger.info('Skipping invalid filing date %r',
                             record['filing_dat'])
            raise SkipRecord
        record['address'] = clean_address(record.pop('address'))
        record['case_number'] = record.pop('case_#')
        record['document_number'] = record.pop('document_#')
        record['pin_number'] = record.pop('pin_number')
        try:
            record['year_of_mortgage'] = str(record.pop('year_of_mo').year)
        except AttributeError:
            record['year_of_mortgage'] = 'Unknown'

        # Normalize inconsistent headers
        for old, new in (('SF', 'sf'), ('SMF', 'smf'), ('Condo', 'condo')):
            try:
                record[new] = record.pop(old)
            except KeyError:
                pass

        if int(record['sf']):
            record['property_type'] = 'Single family'
        elif int(record['smf']):
            record['property_type'] = 'Multi-unit'
        elif int(record['condo']):
            record['property_type'] = 'Condo'
        else:
            record['property_type'] = 'Unknown'

        return record
Esempio n. 24
0
    def clean_list_record(self, record):
        if record['City'].strip().upper() != 'CHICAGO':
            raise SkipRecord
        if record['Amount'].strip().upper() == 'UNKNOWN':
            record['Amount'] = None
        else:
            record['Amount'] = record['Amount'].replace('.00', '').replace(
                '$', '').replace(',', '')

        record['Executed'] = parse_date(record['Executed'], '%m/%d/%Y')
        record['Recorded'] = parse_date(record['Recorded'], '%m/%d/%Y')

        record['clean_address'] = clean_address(record['Address'])
        unit = record['Unit #'] not in ('',
                                        'MANY') and record['Unit #'] or None
        record['clean_address_with_unit'] = '%s%s' % (
            record['clean_address'], (unit and ', unit ' + unit or ''))

        try:
            record['doc_number'] = record['Doc Number']
        except KeyError:
            record['doc_number'] = record['Doc #']

        return record
Esempio n. 25
0
    def clean_list_record(self, record):
        # Collapse the violations into a single value, rather than 58 values,
        # most of which are zero.
        num_violations = []
        for i in range(1, 59):
            val = int(record.pop('vio%s' % i))
            if val:
                num_violations.append((str(i), val))
        record['violations'] = num_violations

        record['inspection_date'] = parse_date(record['inspection_date'], '%m/%d/%Y')
        record['address'] = clean_address(record['address'])

        if record['city'] not in self.city_names:
            raise SkipRecord('Skipping city %s' % record['city'])

        record['city'] = record['city'].title()
        record['visit_number'] = int(record['visit_number'])
        record['critical_violations'] = int(record['critical_violations'])
        record['noncritical_violations'] = int(record['noncritical_violations'])
        record['total_violations'] = int(record['total_violations'])
        record['inspection_number'] = int(record['inspection_number'])

        return record
Esempio n. 26
0
def clean_washington_address(add, city):
    add = add.replace(u'\xa0', ' ')
    add = re.sub(r',\s+%s,\s+WA.*$' % city, '', add)
    add = clean_address(add)
    return add, strip_unit(add).strip()
Esempio n. 27
0
def clean_washington_address(add, city):
    add = add.replace(u'\xa0', ' ')
    add = re.sub(r',\s+%s,\s+WA.*$' % city, '', add)
    add = clean_address(add)
    return add, strip_unit(add).strip()