def save(self, old_record, list_record, detail_record): if old_record is not None: return job_status = self.get_or_create_lookup('job_status', JOB_STATUS_NAMES[list_record['Job Status']], list_record['Job Status']) illumination_type = self.get_or_create_lookup('illumination_type', list_record['illumination_type'], list_record['illumination_type']) sign_location = self.get_or_create_lookup('sign_location', smart_title(list_record['Sign Type']), list_record['Sign Type']) sign_for = self.get_or_create_lookup('sign_for', smart_title(list_record['sign_for']), list_record['sign_for']) title = 'Permit application %s for an %s %s sign' % \ (JOB_STATUS_HEADLINE_VERBS[list_record['Job Status']], (list_record['Sign Illumination'] == 'Y' and 'illuminated' or 'unilluminated'), (list_record['sign_for'] == 'BUSINESS' and 'business' or 'advertising')) attributes = { 'bin': list_record['Bin #'], 'job_number': list_record['Job #'], 'job_status': job_status.id, 'is_landmark': list_record['is_landmark'], 'is_adult_establishment': list_record['is_adult_establishment'], 'is_city_owned': list_record['is_city_owned'], 'is_changeable_copy': list_record['is_changeable_copy'], 'estimated_cost': list_record['Initial Cost'], 'illumination_type': illumination_type.id, 'sign_location': sign_location.id, 'size': list_record['Sign SQ Footage'], 'sign_for': sign_for.id, 'sign_text': list_record['sign_text'], 'job_description': list_record['Job Description 1'], } self.create_newsitem( attributes, title=title, pub_date=list_record['Latest Action Date'], item_date=list_record['Latest Action Date'], location_name=list_record['address'], )
def clean_list_record(self, record): try: record['Date'] = parse_date(record['Date'], '%m/%d/%Y') # 12/31/2007 except ValueError: record['Date'] = parse_date(record['Date'], '%m/%d/%y') # 12/31/07 for key in ('Location', 'Notes', 'Title', 'Type'): if key in record and record[key]: record[key] = record[key].strip() else: record[key] = '' record['Location'] = smart_title(record['Location']) record['Title'] = smart_title(record['Title']) # This is temporary! The CSV files we get are inconsistent -- sometimes # they're only films and don't have a "Type" field. if record['Type'] == '': record['Type'] = 'Film' # Normalize inconsistent data. if record['Type'] in ('Stills', 'Still'): record['Type'] = 'Still photography' if record['Type'] in ('Fim', 'Movie'): record['Type'] = 'Film' return record
def clean_list_record(self, record): record['inspection_date'] = parse_date(record['inspection_date'], '%m/%d/%Y') record['address'] = smart_title(record['address']) record['restaurant_name'] = smart_title(record['restaurant_name']) record['result'] = smart_title(record['result']) return record
def verbose_detail(self, detail): clean_cross_1 = smart_title(detail['cross_street_1']) clean_cross_2 = smart_title(detail['cross_street_2']) if detail.has_key('street_name'): clean_street_name = smart_title(detail['street_name']) return '%s from %s to %s' % (clean_street_name, clean_cross_1, clean_cross_2) else: return 'Intersection of %s and %s' % (clean_cross_1, clean_cross_2)
def clean_list_record(self, record): record['clean_street_name'] = smart_title(remove_leading_zero(record['streetname'])) record['clean_cross_1'] = smart_title(remove_leading_zero(record['Cross Street 1'].replace(' \ ', ' / '))) record['clean_cross_2'] = smart_title(remove_leading_zero(record['Cross Street 2'].replace(' \ ', ' / '))) record['Permit Reason'] = capfirst(record['Permit Reason'].lower()).replace('Cut off service', 'Cut-off service') record['Effective Date'] = parse_date(record['Effective Date'], '%Y-%m-%d %H:%M:%S') record['Expiration Date'] = parse_date(record['Expiration Date'], '%Y-%m-%d %H:%M:%S') return record
def verbose_detail(detail): clean_cross_1 = smart_title(detail["cross_street_1"]) clean_cross_2 = smart_title(detail["cross_street_2"]) if detail.has_key("street_name"): clean_street_name = smart_title(detail["street_name"]) return "%s from %s to %s" % (clean_street_name, clean_cross_1, clean_cross_2) else: return "Intersection of %s and %s" % (clean_cross_1, clean_cross_2)
def clean_list_record(self, record): record['address'] = '%s %s, %s' % (record['House #'], smart_title(record['Street Name']), smart_title(record['Borough'])) record['is_landmark'] = record['Landmark'] == 'Y' record['is_adult_establishment'] = record['Adult Estab'] == 'Y' record['is_city_owned'] = record['City Owned'] == 'Y' record['illumination_type'] = record.get( 'Sign Illumination Type', 'Not available') or 'Not illuminated' if not isinstance(record['Latest Action Date'], datetime.datetime): self.logger.info('Skipping job #%s, with latest action date %s', record.get('Job #'), record['Latest Action Date']) raise SkipRecord() try: record['sign_text'] = record['Text on Sign'].strip() if len(record['sign_text']) > 255: # Some records are malformed and have a bad and long value # for sign text. self.logger.info('Skipping job #%s, with Text on Sign %s', record.get('Job #'), record['Text on Sign']) raise SkipRecord() except AttributeError: try: record['sign_text'] = str(int(record['Text on Sign'])) except TypeError: self.logger.info('Skipping job #%s, with Text on Sign %s', record.get('Job #'), record['Text on Sign']) raise SkipRecord() try: record['sign_for'] = record['Sign Advertising'] except KeyError: record['sign_for'] = record['Usage'] try: record['is_near_highway'] = record['Sign Near Highway'] == 'Y' except KeyError: # Older spreadsheets don't have a 'Sign Near Highway' column, # and there's nothing we can do about it. They have a column called # 'Adjacent to Arterial Highway', but that's not necessarily the # same thing. record['is_near_highway'] = None try: record['is_changeable_copy'] = record[ 'Sign Changeable Copy'] == 'Y' except KeyError: # Older spreadsheets don't have a 'Sign Changeable Copy' column, # but we can deduce the value: if there's text, then it's not # changeable. Otherwise, it's NULL. if record['sign_text']: record['is_changeable_copy'] = False else: record['is_changeable_copy'] = None return record
def clean_list_record(self, record): record['address'] = '%s %s, %s' % (record['House #'], smart_title(record['Street Name']), smart_title(record['Borough'])) record['is_landmark'] = record['Landmarked'] == 'Y' record['is_adult_establishment'] = record['Adult Estab'] == 'Y' record['is_city_owned'] = record['City Owned'] == 'Y' if not isinstance(record['Latest Action Date'], (datetime.date, datetime.datetime)): raise SkipRecord('Got last action date %s' % record['Latest Action Date']) return record
def clean_list_record(self, record): issue_datetime = parse_date(record['issdttm'], '%m/%d/%Y %H:%M:%S', return_datetime=True) record['issue_date'] = issue_datetime.date() record['issue_time'] = issue_datetime.time() record['clean_address'] = '%s %s. %s %s.' % (record['stno'], record['predir'], smart_title(record['stname']), smart_title(record['suffix'])) record['clean_permit_type'] = smart_title(re.sub(r'^PERMIT - ', '', record['apdesc'])) try: record['description'] = record['compute_0009'] except KeyError: record['description'] = record['permit_description'] return record
def clean_list_record(self, record): record['Approved Date'] = parse_date(record['Approved Date'], '%Y-%m-%d %H:%M:%S') record['clean_location'] = smart_title(remove_leading_zero(record['Location'])).strip() record['clean_cross_1'] = smart_title(remove_leading_zero(record['Cross Street 1'].replace(' \ ', ' / '))) record['clean_cross_2'] = smart_title(remove_leading_zero(record['Cross Street 2'].replace(' \ ', ' / '))) record['Agent'] = record['Agent'] and record['Agent'].strip() or 'Unknown agent' record['Permit Type'] = record['Permit Type'] or 'N/A' try: record['Permit Type'] = STREET_USE_PERMIT_TYPES[record['Permit Type']] except KeyError: pass return record
def save(self, old_record, list_record, detail_record): # Throw away records with no filing date. We need to draw a line # somewhere. That line is here. if detail_record['filing_date'] is None: return if detail_record['filing_date'] < datetime.date(2008, 1, 1): return if detail_record['address'] == '': return # If we can't find the city in the locations table, skip this record. try: loc = Location.objects.select_related().get(normalized_name=detail_record['city']) # If we have a neighborhood, the city should be the borough. if loc.location_type.slug == 'neighborhoods': detail_record['city'] = loc.city except Location.DoesNotExist: self.failed_cities[detail_record['city']] = self.failed_cities.get(detail_record['city'], 0) + 1 return status = self.get_or_create_lookup('status', detail_record['License Status:'], detail_record['License Status:'], make_text_slug=False) license_type = self.get_or_create_lookup('license_type', smart_title(detail_record['License Type:']), detail_record['License Type:'], make_text_slug=False) premises_name = detail_record['premises_name'] pretty_date = detail_record['filing_date'].strftime('%B %d, %Y') title = 'Application for %s' % premises_name item_date = detail_record['filing_date'] location_name = smart_title("%s, %s" % (detail_record['address'], detail_record['city'])) attributes = { 'serial_number': list_record['serial_number'], 'effective_date': detail_record['effective_date'], 'expiration_date': detail_record['expiration_date'], 'premises_name': premises_name, 'status': status.id, 'license_type': license_type.id, } if old_record is None: self.create_newsitem( attributes, title=title, item_date=item_date, location_name=location_name, url='http://www.trans.abc.state.ny.us' + list_record['url'] ) else: # This license already exists in our database, but it may have # changed status, so save any new values. new_values = { 'title': title, 'item_date': item_date, 'location_name': location_name, } self.update_existing(old_record, new_values, attributes)
def clean_list_record(self, record): # Strip extra internal whitespace. record["BUILDING CLASS CATEGORY"] = re.sub(r"\s+", " ", record["BUILDING CLASS CATEGORY"]) record["category_name"] = capfirst(record["BUILDING CLASS CATEGORY"][3:].lower()) try: record["sale_price"] = str(int(record["SALE PRICE"])) except ValueError: record["sale_price"] = "N/A" try: year_built = str(int(record["YEAR BUILT"])) except ValueError: year_built = "N/A" if year_built == "0": year_built = "N/A" record["year_built"] = year_built try: address, unit = record["ADDRESS"].split(", ") except ValueError: address, unit = record["ADDRESS"], "" address = smart_title(address) record["clean_address"] = address record["clean_address_with_unit"] = "%s%s" % (address, (unit and ", " + unit or "")) record["borough"] = {1: "Manhattan", 2: "Bronx", 3: "Brooklyn", 4: "Queens", 5: "Staten Island"}[ record["BOROUGH"] ] return record
def render(self, context): """Puts some information about overlapping locations into context[varname]. """ newsitem_context = self.newsitem_context_var.resolve(context) if not isinstance(newsitem_context, dict): raise template.TemplateSyntaxError("The newsitem argument to 'get_locations_for_item' tag must be a dictionary eg. as created by the template_context_for_item() function") # TODO: cache the LocationType lookup? location_types = LocationType.objects.filter(slug__in=self.loctype_slugs) loctype_dict = dict([(d['slug'], d) for d in location_types.values('name', 'slug')]) result = [] newsitem = newsitem_context['_item'] nilocations = newsitem.location_set.all() for slug in self.loctype_slugs: loctype = loctype_dict.get(slug) if loctype is None: continue locations = nilocations.filter(location_type__slug=loctype['slug']) # Assume there is at most one intersecting location of each type. # That will probably be wrong somewhere someday... # eg. neighborhoods with fuzzy borders. locations = list(locations[:1]) if locations: location = locations[0] result.append( {'location_slug': location.slug, 'location_type_slug': loctype['slug'], 'location_type_name': smart_title(loctype['name'], ['ZIP']), 'location_name': location.name, } ) context[self.varname] = result return u''
def save(self, old_record, list_record, detail_records): for record in detail_records: # Since parse_detail emits more than one record, we check for existing # records here rather than in self.existing_record() try: qs = NewsItem.objects.filter(schema__id=self.schema.id, item_date=record['inspection_date']) obj = qs.by_attribute(self.schema_fields['facility_id'], list_record['facid'])[0] except IndexError: pass else: return None if record['inspection_type'] == 'Consultation/Education - Field': continue inspection_type_lookup = self.get_or_create_lookup('inspection_type', record['inspection_type'], record['inspection_type'], make_text_slug=False) violations_lookups = [] for v in record['violations']: vl = self.get_or_create_lookup('violations', v['violation'], v['violation'], make_text_slug=False) violations_lookups.append(vl) attributes = { 'name': list_record['name'], 'inspection_type': inspection_type_lookup.id, 'points': record['points'], 'violations': ','.join([str(l.id) for l in violations_lookups]), 'violations_json': DjangoJSONEncoder().encode(record['violations']), 'facility_id': list_record['facid'], } self.create_newsitem( attributes, title=list_record['name'], url='http://www.decadeonline.com/fac.phtml?agency=skc&forceresults=1&facid=%s' % list_record['facid'], item_date=record['inspection_date'], location_name=smart_title(list_record['location']) )
def convert_to_json(): news_items = NewsItem.objects.filter(schema__slug='excavation-permits').order_by('id') for start, end, total, qs in queryset.batch(news_items): print "processing %s to %s of %s" % (start + 1, end, total) for ni in qs: #print ni.attributes['location_details'] cnn_list, details = ni.attributes['location_details'].split('___') details = [repair_details(d) for d in details.split(';')] location_details = { 'cnn_list': cnn_list.split(','), 'details': details, } #pprint(location_details) ni.attributes['location_details'] = simplejson.dumps(location_details) streets = set() for detail in details: if detail.has_key('street_name'): streets.add(detail['street_name']) else: streets.add(detail['cross_street_1']) location_name = ', '.join([smart_title(street) for street in streets]) if len(location_name) > 150: location_name = VARIOUS description = '; '.join([verbose_detail(loc) for loc in location_details['details']]) ni.location_name = location_name ni.description = description ni.save()
def clean_list_record(self, record): issue_datetime = parse_date(record["issdttm"], "%m/%d/%Y %H:%M:%S", return_datetime=True) record["issue_date"] = issue_datetime.date() record["issue_time"] = issue_datetime.time() record["clean_address"] = "%s %s. %s %s." % ( record["stno"], record["predir"], smart_title(record["stname"]), smart_title(record["suffix"]), ) record["clean_permit_type"] = smart_title(re.sub(r"^PERMIT - ", "", record["apdesc"])) try: record["description"] = record["compute_0009"] except KeyError: record["description"] = record["permit_description"] return record
def save(self, old_record, list_record, detail_record): category = self.get_or_create_lookup('category', list_record['category'], list_record['category'], make_text_slug=False) secondary_category = self.get_or_create_lookup('secondary_category', list_record['secondary_category'], list_record['secondary_category'], make_text_slug=False) beat = self.get_or_create_lookup('beat', list_record['offensebeat'], list_record['offensebeat'], make_text_slug=False) premises = self.get_or_create_lookup('premises', list_record['offensepremises'], list_record['offensepremises'], make_text_slug=False) crime_type = self.get_or_create_lookup('crime_type', list_record['crime_type'], list_record['crime_type'], make_text_slug=False) secondary_crime_type = self.get_or_create_lookup('secondary_crime_type', list_record['secondary_crime_type'], list_record['secondary_crime_type'], make_text_slug=False) kwargs = { 'title': smart_title(list_record['offensedescription']), 'item_date': list_record['offensedate'], 'location_name': list_record['address'] } attributes = { 'category': category.id, 'secondary_category': secondary_category.id, 'service_number': list_record['offenseservicenumber'], 'offense_time': list_record['offensestarttime'], 'description': list_record['offensedescription'], 'beat': beat.id, 'premises': premises.id, 'crime_type': crime_type.id, 'secondary_crime_type': secondary_crime_type.id, 'method': list_record['offensemethodofoffense'], # street is block;direction;street # This will allow us to reprocess the original data when we # improve the address normalizer. 'street': ';'.join((list_record['offenseblock'] or '', list_record['offensedirection'] or '', list_record['offensestreet'] or '')), 'ucr': ';'.join((list_record['offenseucr1'] or '', list_record['offenseucr2'] or '')) } if old_record is None: self.create_newsitem(attributes, **kwargs) else: self.update_existing(old_record, kwargs, attributes)
def clean_list_record(self, record): # Strip extra internal whitespace. record['BUILDING CLASS CATEGORY'] = re.sub(r'\s+', ' ', record['BUILDING CLASS CATEGORY']) record['category_name'] = capfirst(record['BUILDING CLASS CATEGORY'][3:].lower()) try: record['sale_price'] = str(int(record['SALE PRICE'])) except ValueError: record['sale_price'] = 'N/A' try: year_built = str(int(record['YEAR BUILT'])) except ValueError: year_built = 'N/A' if year_built == '0': year_built = 'N/A' record['year_built'] = year_built try: address, unit = record['ADDRESS'].split(', ') except ValueError: address, unit = record['ADDRESS'], '' address = smart_title(address) record['clean_address'] = address record['clean_address_with_unit'] = '%s%s' % (address, (unit and ', ' + unit or '')) record['borough'] = { 1: 'Manhattan', 2: 'Bronx', 3: 'Brooklyn', 4: 'Queens', 5: 'Staten Island' }[record['BOROUGH']] return record
def save(self, old_record, list_record, detail_record): inspection_type = self.get_or_create_lookup('inspection_type', list_record['inspection_type'], list_record['inspection_type']) result = self.get_or_create_lookup('result', list_record['result'], list_record['result']) # Make up a unique id so we can show other inspections at this # facility on the detail page. eb_facility_id = md5.new('%s:%s' % (list_record['name'], list_record['address'])).hexdigest() json_data = { 'zipcode': list_record['zipcode'], 'suite': list_record['suite'], 'mapsco': list_record['mapsco'] } kwargs = { 'title': smart_title(list_record['name'].decode('utf-8')), 'item_date': list_record['inspection_date'], 'location_name': list_record['address'] } attributes = { 'name': list_record['name'].decode('utf-8'), 'inspection_type': inspection_type.id, 'result': result.id, 'score': list_record['score'], 'eb_facility_id': eb_facility_id, 'json': json.dumps(json_data) } if old_record is None: self.create_newsitem(attributes, **kwargs) else: self.update_existing(old_record, kwargs, attributes)
def save(self, old_record, list_record, detail_record): print list_record if old_record is not None: self.logger.debug('Record already exists') return project_type = self.get_or_create_lookup('project_type', list_record['project_type'], list_record['project_type'], make_text_slug=False) title = 'Certificate issued for %s' % project_type.name attributes = { 'parcel_id': list_record['PID__Parcel_ID_'], 'permit_number': list_record['Permit_Number'], 'cost': list_record['Cost'], 'project_type': project_type.id, 'project_type_raw': list_record['USDC_Code'], 'num_units': list_record['NumberOfUnits'], 'heated_sqft': list_record['Heated_Square_Feet'] } self.create_newsitem( attributes, title=title, item_date=list_record['CO_Date'], location_name=smart_title(list_record['Project_Address']), )
def save(self, old_record, list_record, detail_record): if old_record is not None: return if list_record['location'] == ' ': return bulletin_type_lookup = self.get_or_create_lookup('bulletin_type', capfirst(list_record['bulletin_type'].lower()), list_record['bulletin_type'].upper(), make_text_slug=False) zone_lookups = [] for z in detail_record['zone']: zone_lookup = self.get_or_create_lookup('zone', capfirst(z.lower()), z, make_text_slug=False) zone_lookups.append(zone_lookup) attributes = { 'project_number': list_record['project_number'], 'description': detail_record.get('description', None), 'bulletin_type': bulletin_type_lookup.id, 'application_date': detail_record['application_date'], 'complete_date': detail_record['complete_date'], 'zone': ','.join([str(z.id) for z in zone_lookups]), 'bid': list_record['bid'], 'nid': list_record['nid'], } self.create_newsitem( attributes, title=bulletin_type_lookup.name, url='http://web1.seattle.gov/dpd/luib/Notice.aspx?BID=%s&NID=%s' % (list_record['bid'], list_record['nid']), item_date=list_record['bulletin_date'], location_name=smart_title(list_record['location']) )
def clean_list_record(self, record): if record['county'].upper().strip() not in self.counties: raise SkipRecord('Record not in %s.' % self.counties) record['activity_date'] = parse_date(record['activity_date'], '%m/%d/%Y') record['dba'] = smart_title(record['dba']) record['address'] = clean_address(record['address']) return record
def save(self, old_record, list_record, detail_record): if not isinstance(list_record['Issue_Date'], datetime.date): self.logger.debug("Did not save %s. Invalid date %s." % (list_record['ExternalFileNum'], list_record['Issue_Date'])) return if old_record is not None: self.logger.debug('Record already exists') return permit_type = self.get_or_create_lookup('permit_type', list_record['PermitType'], list_record['PermitType'], make_text_slug=False) project_type = self.get_or_create_lookup('project_type', list_record['USDCCodeNumber'], list_record['USDCCodeNumber'], make_text_slug=False) occupancy_type = self.get_or_create_lookup('occupancy_type', list_record['occupancy_type'], list_record['occupancy_type'], make_text_slug=False) construction_type = self.get_or_create_lookup('construction_type', list_record['ConstructionType'], list_record['ConstructionType'], make_text_slug=False) title = 'Permit issued for %s' % project_type.name attributes = { 'permit_number': list_record['ExternalFileNum'], 'cost': list_record['Construction_Cost'], 'permit_type': permit_type.id, 'project_type': project_type.id, 'project_number': list_record['ProjectNumber'], 'owner': list_record['OwnerTenant'], 'occupancy_type': occupancy_type.id, 'occupancy_type_raw': list_record['Occupancy'], 'number_of_stories': list_record['NumberOfStories'], 'construction_type': construction_type.id, 'total_fee': list_record['TotalFee'], } self.create_newsitem( attributes, title=title, item_date=list_record['Issue_Date'], location_name=smart_title(list_record['Address']), )
def clean_list_record(self, record): notes = [] notes_pats = [r'(?P<value>.*?)\s*\-*\s*(?P<notes>\(?\s*w\/d.*)', r'(?P<value>.*?)\s*\-*\s*(?P<notes>\(?\s*withd.*)', r'(?P<value>.*?)\s*\-*\s*(?P<notes>\(?\s*ch\s+227\s+sec\s*5A.*)', r'(?P<value>.*?)\s*\-*\s*(?P<notes>\(?\s*ch\s+bus\s+.*)', r'(?P<value>.*?)\s*\-*\s*(?P<notes>\(?\s*c\/l.*)', ] # strip notes off of several cruft-prone fields for field in ['name', 'business_type', 'location']: val = record.get(field, '').strip() for pat in notes_pats: m = re.match(pat, val, re.I|re.M) if m is not None: results = m.groupdict() val = results['value'] notes.append(results['notes']) record[field] = val.strip() record['notes'] = notes record['location'] = smart_title(record['location'].strip()) record['date'] = parse_date(record['date'].strip(), '%Y-%m-%d') if (record['name'].upper(), record['location'].upper()) in BUSINESS_NAMES_TO_IGNORE: raise SkipRecord('Skipping %s (explicitly ignored)' % record['name']) if (record['location'] == ''): raise SkipRecord('Skipping %s (no location)' % record['name']) return record
def clean_list_record(self, record): issue_datetime = parse_date(record['issdttm'], '%m/%d/%Y %H:%M:%S', return_datetime=True) record['issue_date'] = issue_datetime.date() record['issue_time'] = issue_datetime.time() record['clean_address'] = '%s %s. %s %s.' % ( record['stno'], record['predir'], smart_title( record['stname']), smart_title(record['suffix'])) record['clean_permit_type'] = smart_title( re.sub(r'^PERMIT - ', '', record['apdesc'])) try: record['description'] = record['compute_0009'] except KeyError: record['description'] = record['permit_description'] return record
def save(self, old_record, list_record, detail_record): secondary_type = self.get_or_create_lookup( 'secondary_type', list_record['secondary_type'], list_record['secondary_type_id']) real_address = '%s %s %s' % (list_record['addr_block'], list_record['addr_direction'], list_record['addr_street']) block_address = '%s00 block %s. %s' % ( list_record['addr_block'][:-2], list_record['addr_direction'], smart_title(list_record['addr_street'])) crime_location = Point(list_record['x_coord'], list_record['y_coord'], srid=102671) crime_location = self.safe_location(real_address, crime_location, 375) new_attributes = { 'is_outdated': False, 'case_number': list_record['case_number'], 'crime_time': list_record['crime_time'], 'primary_type': self.get_or_create_lookup('primary_type', list_record['primary_type'], list_record['primary_type']).id, 'secondary_type': secondary_type.id, 'place': self.get_or_create_lookup('place', list_record['place'], list_record['place']).id, 'beat': self.get_or_create_lookup('beat', list_record['beat'], list_record['beat']).id, 'domestic': list_record['domestic'], 'xy': '%s;%s' % (list_record['x_coord'], list_record['y_coord']), 'real_address': real_address, } if old_record is None: self.create_newsitem( new_attributes, title=secondary_type.name, url='http://gis.chicagopolice.org/', item_date=list_record['crime_date'], location=crime_location, location_name=block_address, ) else: # This crime already exists in our database, but check whether any # of the values have changed. new_values = { 'title': secondary_type.name, 'item_date': list_record['crime_date'], 'location_name': block_address } self.update_existing(old_record, new_values, new_attributes)
def save(self, old_record, list_record, detail_record): for record in detail_record: # Since parse_detail emits more than one record, we check for existing # records here rather than in self.existing_record() try: qs = NewsItem.objects.filter(schema__id=self.schema.id, item_date=record['inspection_date']) obj = qs.by_attribute(self.schema_fields['facility_id'], list_record['facid'])[0] except IndexError: pass else: return None inspection_type_lookup = self.get_or_create_lookup('inspection_type', record['inspection_type'], record['inspection_type'], make_text_slug=False) violations_lookups = [] for violation in record['violations']: lookup = self.get_or_create_lookup('violations', violation, violation, make_text_slug=False) violations_lookups.append(lookup) attributes = { 'name': list_record['name'], 'inspection_type': inspection_type_lookup.id, 'violations': ','.join([str(l.id) for l in violations_lookups]), 'facility_id': list_record['facid'], } self.create_newsitem( attributes, title=smart_title(list_record['name']), url=self.detail_uri % list_record['facid'], item_date=record['inspection_date'], location_name=clean_address(list_record['location']) )
def convert_to_json(): news_items = NewsItem.objects.filter(schema__slug="excavation-permits").order_by("id") for start, end, total, qs in queryset.batch(news_items): print "processing %s to %s of %s" % (start + 1, end, total) for ni in qs: # print ni.attributes['location_details'] cnn_list, details = ni.attributes["location_details"].split("___") details = [repair_details(d) for d in details.split(";")] location_details = {"cnn_list": cnn_list.split(","), "details": details} # pprint(location_details) ni.attributes["location_details"] = simplejson.dumps(location_details) streets = set() for detail in details: if detail.has_key("street_name"): streets.add(detail["street_name"]) else: streets.add(detail["cross_street_1"]) location_name = ", ".join([smart_title(street) for street in streets]) if len(location_name) > 150: location_name = VARIOUS description = "; ".join([verbose_detail(loc) for loc in location_details["details"]]) ni.location_name = location_name ni.description = description ni.save()
def convert_to_json(): news_items = NewsItem.objects.filter( schema__slug='excavation-permits').order_by('id') for start, end, total, qs in queryset.batch(news_items): print "processing %s to %s of %s" % (start + 1, end, total) for ni in qs: #print ni.attributes['location_details'] cnn_list, details = ni.attributes['location_details'].split('___') details = [repair_details(d) for d in details.split(';')] location_details = { 'cnn_list': cnn_list.split(','), 'details': details, } #pprint(location_details) ni.attributes['location_details'] = simplejson.dumps( location_details) streets = set() for detail in details: if detail.has_key('street_name'): streets.add(detail['street_name']) else: streets.add(detail['cross_street_1']) location_name = ', '.join( [smart_title(street) for street in streets]) if len(location_name) > 150: location_name = VARIOUS description = '; '.join( [verbose_detail(loc) for loc in location_details['details']]) ni.location_name = location_name ni.description = description ni.save()
def save(self, old_record, list_record, detail_record): if old_record is not None: return if list_record['location'] == ' ': return bulletin_type_lookup = self.get_or_create_lookup( 'bulletin_type', capfirst(list_record['bulletin_type'].lower()), list_record['bulletin_type'].upper(), make_text_slug=False) zone_lookups = [] for z in detail_record['zone']: zone_lookup = self.get_or_create_lookup('zone', capfirst(z.lower()), z, make_text_slug=False) zone_lookups.append(zone_lookup) attributes = { 'project_number': list_record['project_number'], 'description': detail_record.get('description', None), 'bulletin_type': bulletin_type_lookup.id, 'application_date': detail_record['application_date'], 'complete_date': detail_record['complete_date'], 'zone': ','.join([str(z.id) for z in zone_lookups]), 'bid': list_record['bid'], 'nid': list_record['nid'], } self.create_newsitem( attributes, title=bulletin_type_lookup.name, url='http://web1.seattle.gov/dpd/luib/Notice.aspx?BID=%s&NID=%s' % (list_record['bid'], list_record['nid']), item_date=list_record['bulletin_date'], location_name=smart_title(list_record['location']))
def clean_list_record(self, record): record['permit_date'] = parse_date(record['permit_date'], '%m/%d/%Y') record['description'] = re.sub(r'[\r\n]+', ' ', record['description']).strip() record['description'] = record['description'].decode( 'iso-8859-1') # Avoid database-level encoding errors record['clean_address'] = smart_title(record['address']) return record
def clean_list_record(self, record): record['clean_street_name'] = smart_title( remove_leading_zero(record['streetname'])) record['clean_cross_1'] = smart_title( remove_leading_zero(record['Cross Street 1'].replace(' \ ', ' / '))) record['clean_cross_2'] = smart_title( remove_leading_zero(record['Cross Street 2'].replace(' \ ', ' / '))) record['Permit Reason'] = capfirst( record['Permit Reason'].lower()).replace('Cut off service', 'Cut-off service') record['Effective Date'] = parse_date(record['Effective Date'], '%Y-%m-%d %H:%M:%S') record['Expiration Date'] = parse_date(record['Expiration Date'], '%Y-%m-%d %H:%M:%S') return record
def clean_list_record(self, record): record["name"] = record["name"].strip() record["business_type"] = record["business_type"].strip() record["location"] = smart_title(record["location"].strip()) record["date"] = parse_date(record["date"].strip(), "%Y-%m-%d") if (record["name"].upper(), record["location"].upper()) in BUSINESS_NAMES_TO_IGNORE: raise SkipRecord("Skipping %s" % record["name"]) return record
def clean_list_record(self, record): record['name'] = record['name'].strip() record['business_type'] = record['business_type'].strip() record['location'] = smart_title(record['location'].strip()) record['date'] = parse_date(record['date'].strip(), '%Y-%m-%d') if (record['name'].upper(), record['location'].upper()) in BUSINESS_NAMES_TO_IGNORE: raise SkipRecord('Skipping %s' % record['name']) return record
def save(self, old_record, list_record, detail_record): category = self.get_or_create_lookup( "category", list_record["category"], list_record["category"], make_text_slug=False ) secondary_category = self.get_or_create_lookup( "secondary_category", list_record["secondary_category"], list_record["secondary_category"], make_text_slug=False, ) beat = self.get_or_create_lookup( "beat", list_record["offensebeat"], list_record["offensebeat"], make_text_slug=False ) premises = self.get_or_create_lookup( "premises", list_record["offensepremises"], list_record["offensepremises"], make_text_slug=False ) crime_type = self.get_or_create_lookup( "crime_type", list_record["crime_type"], list_record["crime_type"], make_text_slug=False ) secondary_crime_type = self.get_or_create_lookup( "secondary_crime_type", list_record["secondary_crime_type"], list_record["secondary_crime_type"], make_text_slug=False, ) kwargs = { "title": smart_title(list_record["offensedescription"]), "item_date": list_record["offensedate"], "location_name": list_record["address"], } attributes = { "category": category.id, "secondary_category": secondary_category.id, "service_number": list_record["offenseservicenumber"], "offense_time": list_record["offensestarttime"], "description": list_record["offensedescription"], "beat": beat.id, "premises": premises.id, "crime_type": crime_type.id, "secondary_crime_type": secondary_crime_type.id, "method": list_record["offensemethodofoffense"], # street is block;direction;street # This will allow us to reprocess the original data when we # improve the address normalizer. "street": ";".join( ( list_record["offenseblock"] or "", list_record["offensedirection"] or "", list_record["offensestreet"] or "", ) ), "ucr": ";".join((list_record["offenseucr1"] or "", list_record["offenseucr2"] or "")), } if old_record is None: self.create_newsitem(attributes, **kwargs) else: self.update_existing(old_record, kwargs, attributes)
def save(self, old_record, list_record, detail_record): if old_record is not None: self.logger.debug('Record already exists') return if list_record['location'] is None: self.logger.debug('Skipping %s. No address found.' % list_record['PERMIT_NUMBER']) return project_type = self.get_or_create_lookup( 'project_type', list_record['USDC_Activity_Type'], list_record['USDC_Activity_Type'], make_text_slug=False) inspection_type = self.get_or_create_lookup( 'inspection_type', list_record['TASKPERFORMED'], list_record['TASKPERFORMED'], make_text_slug=False) result = self.get_or_create_lookup('result', list_record['RESULT'], list_record['RESULT'], make_text_slug=False) detail_lookups = [] for i in range(1, 10): code = list_record['Defect%s_Code' % i].strip() name = list_record['DEFECT%s' % i].strip() if name != '' and code != '': lookup = self.get_or_create_lookup('details', name, code, make_text_slug=False) detail_lookups.append(lookup) if list_record['RESULT'] == '01 - Passed': title = "Project passed inspection at %s" % list_record['location'] elif list_record['RESULT'] == '02 - Failed': title = "Project failed inspection at %s" % list_record['location'] elif list_record['RESULT'] == '03 - Inaccessible': title = "Project conditionally passed inspection at %s" % list_record[ 'location'] elif list_record['RESULT'] == 'Not Done': title = "Project was not inspected at %s" % list_record['location'] attributes = { 'contractor_id': list_record['CONTRACTORID'], 'contractor': list_record['CONTRACTOR'], 'project_type': project_type.id, 'permit_number': list_record['PERMIT_NUMBER'], 'inspection_type': inspection_type.id, 'result': result.id, 'details': ','.join([str(d.id) for d in detail_lookups]) } self.create_newsitem( attributes, title=title, item_date=list_record['InspectionDate'], location_name=smart_title(list_record['location']), )
def clean_list_record(self, record): record['Approved Date'] = parse_date(record['Approved Date'], '%Y-%m-%d %H:%M:%S') record['clean_location'] = smart_title( remove_leading_zero(record['Location'])).strip() record['clean_cross_1'] = smart_title( remove_leading_zero(record['Cross Street 1'].replace(' \ ', ' / '))) record['clean_cross_2'] = smart_title( remove_leading_zero(record['Cross Street 2'].replace(' \ ', ' / '))) record['Agent'] = record['Agent'] and record['Agent'].strip( ) or 'Unknown agent' record['Permit Type'] = record['Permit Type'] or 'N/A' try: record['Permit Type'] = STREET_USE_PERMIT_TYPES[ record['Permit Type']] except KeyError: pass return record
def save(self, old_record, list_record, detail_record): if list_record['TYPE_ACT'] in ('Visit', 'CV Visit', 'Status Change', 'CV Follow-Up'): return schema_slug = list_record['schema_slug'] schema = self.schemas[schema_slug] result_lookup = self.get_or_create_lookup('result', list_record['result'], list_record['result'], schema=schema_slug) facility_type_lookup = self.get_or_create_lookup('facility_type', list_record['facility_type'], list_record['facility_type'], schema=schema_slug, make_text_slug=False) facility_status_lookup = self.get_or_create_lookup('facility_status', list_record['ACT_PSC'], list_record['ACT_PSC'], schema=schema_slug, make_text_slug=False) classification_lookup = self.get_or_create_lookup('classification', list_record['CLASSIFICATION'], list_record['CLASSIFICATION'], schema=schema_slug, make_text_slug=False) action_type_lookup = self.get_or_create_lookup('action_type', list_record['TYPE_ACT'], list_record['TYPE_ACT'], schema=schema_slug, make_text_slug=False) if schema_slug == FOOD_SLUG: if list_record['DATE'] >= datetime.date(2008, 7, 1): prefix = '2.' else: prefix = '1.' else: prefix = '' v_type_lookup_list = [] v_list = [] for v in list_record['violation']: v_type_lookup = self.get_or_create_lookup('violation', prefix + v['id'], prefix + v['id'], schema=schema_slug, make_text_slug=False) v_type_lookup_list.append(v_type_lookup) v_list.append({'lookup_id': v_type_lookup.id, 'value': v['value'], 'comment': v['comment']}) violations_json = DjangoJSONEncoder().encode(v_list) title = list_record['FAC_NAME'] address = ' '.join([list_record['ADDR1'], list_record['ADDR2']]) attributes = { 'name': list_record['FAC_NAME'], 'facility_type': facility_type_lookup.id, 'facility_status': facility_status_lookup.id, 'raw_score':list_record['RAW_SCORE'], 'final_score': list_record['FIN_SCORE'], 'result': result_lookup.id, 'classification': classification_lookup.id, 'facility_id': list_record['FAC_ID'], 'violation': ','.join([str(v.id) for v in v_type_lookup_list]), 'violation_detail': violations_json, 'action_type': action_type_lookup.id } values = { 'schema': schema, 'title': title, 'item_date': list_record['DATE'], 'location_name': smart_title(address), } if old_record is None: self.create_newsitem(attributes, **values) else: self.update_existing(old_record, values, attributes)
def clean_list_record(self, record): record['address'] = '%s %s, %s' % (record['House #'], smart_title(record['Street Name']), smart_title(record['Borough'])) record['is_landmark'] = record['Landmark'] == 'Y' record['is_adult_establishment'] = record['Adult Estab'] == 'Y' record['is_city_owned'] = record['City Owned'] == 'Y' record['illumination_type'] = record.get('Sign Illumination Type', 'Not available') or 'Not illuminated' if not isinstance(record['Latest Action Date'], datetime.datetime): self.logger.info('Skipping job #%s, with latest action date %s', record.get('Job #'), record['Latest Action Date']) raise SkipRecord() try: record['sign_text'] = record['Text on Sign'].strip() if len(record['sign_text']) > 255: # Some records are malformed and have a bad and long value # for sign text. self.logger.info('Skipping job #%s, with Text on Sign %s', record.get('Job #'), record['Text on Sign']) raise SkipRecord() except AttributeError: try: record['sign_text'] = str(int(record['Text on Sign'])) except TypeError: self.logger.info('Skipping job #%s, with Text on Sign %s', record.get('Job #'), record['Text on Sign']) raise SkipRecord() try: record['sign_for'] = record['Sign Advertising'] except KeyError: record['sign_for'] = record['Usage'] try: record['is_near_highway'] = record['Sign Near Highway'] == 'Y' except KeyError: # Older spreadsheets don't have a 'Sign Near Highway' column, # and there's nothing we can do about it. They have a column called # 'Adjacent to Arterial Highway', but that's not necessarily the # same thing. record['is_near_highway'] = None try: record['is_changeable_copy'] = record['Sign Changeable Copy'] == 'Y' except KeyError: # Older spreadsheets don't have a 'Sign Changeable Copy' column, # but we can deduce the value: if there's text, then it's not # changeable. Otherwise, it's NULL. if record['sign_text']: record['is_changeable_copy'] = False else: record['is_changeable_copy'] = None return record
def save(self, old_record, list_record, detail_record): secondary_type = self.get_or_create_lookup( "secondary_type", list_record["secondary_type"], list_record["secondary_type_id"] ) real_address = "%s %s %s" % ( list_record["addr_block"], list_record["addr_direction"], list_record["addr_street"], ) block_address = "%s00 block %s. %s" % ( list_record["addr_block"][:-2], list_record["addr_direction"], smart_title(list_record["addr_street"]), ) crime_location = Point(list_record["x_coord"], list_record["y_coord"], srid=102671) crime_location = self.safe_location(real_address, crime_location, 375) new_attributes = { "is_outdated": False, "case_number": list_record["case_number"], "crime_time": list_record["crime_time"], "primary_type": self.get_or_create_lookup( "primary_type", list_record["primary_type"], list_record["primary_type"] ).id, "secondary_type": secondary_type.id, "place": self.get_or_create_lookup("place", list_record["place"], list_record["place"]).id, "beat": self.get_or_create_lookup("beat", list_record["beat"], list_record["beat"]).id, "domestic": list_record["domestic"], "xy": "%s;%s" % (list_record["x_coord"], list_record["y_coord"]), "real_address": real_address, } if old_record is None: self.create_newsitem( new_attributes, title=secondary_type.name, url="http://gis.chicagopolice.org/", item_date=list_record["crime_date"], location=crime_location, location_name=block_address, ) else: # This crime already exists in our database, but check whether any # of the values have changed. new_values = { "title": secondary_type.name, "item_date": list_record["crime_date"], "location_name": block_address, } self.update_existing(old_record, new_values, new_attributes)
def make_street_pretty_name(prefix, street, suffix): """ >>> make_street_pretty_name(None, 'whee', None) u'Whee' >>> make_street_pretty_name('oh', 'boy', None) u'Oh Boy' >>> make_street_pretty_name('', 'YES', 'nO') u'Yes No' >>> make_street_pretty_name(' US hWy ', '101', 'C') u'US Hwy 101 C' >>> make_street_pretty_name(' I- ', '40', '') u'I-40' """ prefix = make_pretty_prefix(prefix or u'') suffix = smart_title(suffix or u'').strip() street = smart_title(street or u'').strip() #assert street if prefix == u'I': # Special case to avoid "I- 40", the standard is apparently "I-40" prefix = u'' street = u'I-%s' % street street_name = u' '.join((prefix, street, suffix)).strip() return street_name
def save(self, old_record, list_record, detail_record): if not isinstance(list_record['Issue_Date'], datetime.date): self.logger.debug( "Did not save %s. Invalid date %s." % (list_record['ExternalFileNum'], list_record['Issue_Date'])) return if old_record is not None: self.logger.debug('Record already exists') return permit_type = self.get_or_create_lookup('permit_type', list_record['PermitType'], list_record['PermitType'], make_text_slug=False) project_type = self.get_or_create_lookup('project_type', list_record['USDCCodeNumber'], list_record['USDCCodeNumber'], make_text_slug=False) occupancy_type = self.get_or_create_lookup( 'occupancy_type', list_record['occupancy_type'], list_record['occupancy_type'], make_text_slug=False) construction_type = self.get_or_create_lookup( 'construction_type', list_record['ConstructionType'], list_record['ConstructionType'], make_text_slug=False) title = 'Permit issued for %s' % project_type.name attributes = { 'permit_number': list_record['ExternalFileNum'], 'cost': list_record['Construction_Cost'], 'permit_type': permit_type.id, 'project_type': project_type.id, 'project_number': list_record['ProjectNumber'], 'owner': list_record['OwnerTenant'], 'occupancy_type': occupancy_type.id, 'occupancy_type_raw': list_record['Occupancy'], 'number_of_stories': list_record['NumberOfStories'], 'construction_type': construction_type.id, 'total_fee': list_record['TotalFee'], } self.create_newsitem( attributes, title=title, item_date=list_record['Issue_Date'], location_name=smart_title(list_record['Address']), )
def clean_list_record(self, record): # Save the raw address so we can use it to find duplicate records in # the future. address = smart_title(record['address'].strip().replace('&', '&').replace(' ', ' ')).strip() record['raw_address'] = address record['address'] = address_to_block(clean_address(address)) record['disposition'] = record['disposition'].replace('&', '&').replace(' ', ' ').strip() or 'Not available' record['event'] = record['event'].replace('&', '&').replace(' ', ' ').strip() item_date = parse_date(record['datetime'], '%m/%d/%Y %I:%M:%S %p', return_datetime=True) record['item_date'] = item_date.date() record['item_time'] = item_date.time() # Normalize this value. if record['disposition'] == 'CANCCOMM': record['disposition'] = 'CANCELLED BY COMMUNICATIONS' return record
def clean_list_record(self, record): dispatch_datetime = parse_date(record['DISPATCH_DATE_TIME'], '%m/%d/%Y %I:%M:%S %p', return_datetime=True) record['dispatch_date'] = dispatch_datetime.date() record['dispatch_time'] = dispatch_datetime.time() record['LOCATION'] = smart_title(record['LOCATION']) # Convert '531 - Burglary: Day; No Force: Prvt. Residence' to 'Burglary'. record['primary_type'] = record['UCR_TEXT'].split(':')[0].split( ' - ')[1].title() # Clean up an inconsistency. if record['primary_type'] == 'Auto Theft': record['primary_type'] = 'Vehicle Theft' record['X_COORD'] = float(record['X_COORD']) record['Y_COORD'] = float(record['Y_COORD']) return record
def render(self, context): """Puts some information about overlapping locations into context[varname]. """ newsitem_context = self.newsitem_context_var.resolve(context) if isinstance(newsitem_context, dict): newsitem = newsitem_context.get('_item', None) else: newsitem = newsitem_context if not is_instance_of_model(newsitem, NewsItem): raise template.TemplateSyntaxError( "The newsitem argument to 'get_locations_for_item' tag must be either a NewsItem, or a dictionary eg. as created by the template_context_for_item() function" ) # TODO: cache the LocationType lookup? location_types = LocationType.objects.filter( slug__in=self.loctype_slugs) loctype_dict = dict([(d['slug'], d) for d in location_types.values('name', 'slug')]) result = [] nilocations = newsitem.location_set.all() for slug in self.loctype_slugs: loctype = loctype_dict.get(slug) if loctype is None: continue locations = nilocations.filter(location_type__slug=loctype['slug']) # Assume there is at most one intersecting location of each type. # That will probably be wrong somewhere someday... # eg. neighborhoods with fuzzy borders. locations = list(locations[:1]) if locations: location = locations[0] result.append({ 'location_slug': location.slug, 'location_type_slug': loctype['slug'], 'location_type_name': smart_title(loctype['name'], ['ZIP']), 'location_name': location.name, }) context[self.varname] = result return u''
def make_pretty_prefix(prefix): """ >>> make_pretty_prefix('US Hwy') u'US Highway' >>> make_pretty_prefix('State Rt ') u'State Route' >>> make_pretty_prefix(' I- ') u'I' >>> make_pretty_prefix(' Anything Else ') u'Anything Else' """ prefix = unicode(prefix).strip() if prefix.upper().endswith(u'HWY'): return prefix[:-3] + u'Highway' if prefix.upper().endswith(u'RT'): return prefix[:-2] + u'Route' prefix = prefix.strip().strip('-').strip() prefix = smart_title(prefix, exceptions=['US']) return prefix
def save(self, old_record, list_record, detail_record): for record in detail_record: # Since parse_detail emits more than one record, we check for existing # records here rather than in self.existing_record() try: qs = NewsItem.objects.filter( schema__id=self.schema.id, item_date=record['inspection_date']) obj = qs.by_attribute(self.schema_fields['facility_id'], list_record['facid'])[0] except IndexError: pass else: return None inspection_type_lookup = self.get_or_create_lookup( 'inspection_type', record['inspection_type'], record['inspection_type'], make_text_slug=False) violations_lookups = [] for violation in record['violations']: lookup = self.get_or_create_lookup('violations', violation, violation, make_text_slug=False) violations_lookups.append(lookup) attributes = { 'name': list_record['name'], 'inspection_type': inspection_type_lookup.id, 'violations': ','.join([str(l.id) for l in violations_lookups]), 'facility_id': list_record['facid'], } self.create_newsitem(attributes, title=smart_title(list_record['name']), url=self.detail_uri % list_record['facid'], item_date=record['inspection_date'], location_name=clean_address( list_record['location']))
def make_street_pretty_name(street, suffix): street_name = smart_title(street) if suffix: street_name += u' %s.' % smart_title(suffix) return street_name