def clean_list_record(self, record): record['CALL_DATE'] = parse_date(record['CALL_DATE'], '%Y-%m-%d') record['OFFENSE_DATE'] = parse_date(record['OFFENSE_DATE'], '%Y-%m-%d') record['REPORT_DATE'] = parse_date(record['REPORT_DATE'], '%Y-%m-%d') record['COMMON_LOCATION'] = record['COMMON_LOCATION'].strip() record['ADDRESS_NBR'] = record['ADDRESS_NBR'].strip() record['ADDRESS'] = record['ADDRESS'].strip() # The 'NARRATIVE' field includes time and disposition data. Parse that out. m = re.search( r'^Time: (?P<TIME>\d\d?:\d\d)<br>.*?<br>Disposition: (?P<DISPOSITION>.*)$', record.pop('NARRATIVE')) record.update(m.groupdict()) record['TIME'] = parse_time(record['TIME'], '%H:%M') # Set location_name. The logic is different depending on the ADDRESS_TYPE. address_type = record['ADDRESS_TYPE'] if address_type == 'PREMISE ADDRESS': record['location_name'] = '%s block of %s' % ( record['ADDRESS_NBR'], clean_address(record['ADDRESS'])) elif address_type == 'INTERSECTION': if '/' in record['ADDRESS']: streets = record['ADDRESS'].split('/') record['location_name'] = '%s and %s' % (clean_address( streets[0]), clean_address(streets[1])) else: record['location_name'] = clean_address(record['ADDRESS']) elif address_type == 'GEO-OVERRIDE': record['location_name'] = clean_address(record['ADDRESS']) elif address_type == 'COMMON LOCATION': if record['ADDRESS_NBR'] and record['ADDRESS']: record['location_name'] = '%s %s' % ( record['ADDRESS_NBR'], clean_address(record['ADDRESS'])) elif record['ADDRESS'] and record['COMMON_LOCATION']: record['location_name'] = '%s (%s)' % (clean_address( record['ADDRESS']), clean_address( record['COMMON_LOCATION'])) elif record['COMMON_LOCATION']: record['location_name'] = clean_address( record['COMMON_LOCATION']) elif record['ADDRESS']: record['location_name'] = clean_address(record['ADDRESS']) else: record['location_name'] = 'Unknown' else: record['location_name'] = 'Unknown' try: d = CUSTOM_CATEGORIES[record['ORIG_CRIMETYPE_NAME']] except KeyError: d = ('Unknown', 'Unknown') record['broad_category'], record['detail_category'] = d return record
def clean_list_record(self, record): record['CALL_DATE'] = parse_date(record['CALL_DATE'], '%Y-%m-%d') record['OFFENSE_DATE'] = parse_date(record['OFFENSE_DATE'], '%Y-%m-%d') record['REPORT_DATE'] = parse_date(record['REPORT_DATE'], '%Y-%m-%d') record['COMMON_LOCATION'] = record['COMMON_LOCATION'].strip() record['ADDRESS_NBR'] = record['ADDRESS_NBR'].strip() record['ADDRESS'] = record['ADDRESS'].strip() # The 'NARRATIVE' field includes time and disposition data. Parse that out. m = re.search(r'^Time: (?P<TIME>\d\d?:\d\d)<br>.*?<br>Disposition: (?P<DISPOSITION>.*)$', record.pop('NARRATIVE')) record.update(m.groupdict()) record['TIME'] = parse_time(record['TIME'], '%H:%M') # Set location_name. The logic is different depending on the ADDRESS_TYPE. address_type = record['ADDRESS_TYPE'] if address_type == 'PREMISE ADDRESS': record['location_name'] = '%s block of %s' % (record['ADDRESS_NBR'], clean_address(record['ADDRESS'])) elif address_type == 'INTERSECTION': if '/' in record['ADDRESS']: streets = record['ADDRESS'].split('/') record['location_name'] = '%s and %s' % (clean_address(streets[0]), clean_address(streets[1])) else: record['location_name'] = clean_address(record['ADDRESS']) elif address_type == 'GEO-OVERRIDE': record['location_name'] = clean_address(record['ADDRESS']) elif address_type == 'COMMON LOCATION': if record['ADDRESS_NBR'] and record['ADDRESS']: record['location_name'] = '%s %s' % (record['ADDRESS_NBR'], clean_address(record['ADDRESS'])) elif record['ADDRESS'] and record['COMMON_LOCATION']: record['location_name'] = '%s (%s)' % (clean_address(record['ADDRESS']), clean_address(record['COMMON_LOCATION'])) elif record['COMMON_LOCATION']: record['location_name'] = clean_address(record['COMMON_LOCATION']) elif record['ADDRESS']: record['location_name'] = clean_address(record['ADDRESS']) else: record['location_name'] = 'Unknown' else: record['location_name'] = 'Unknown' try: d = CUSTOM_CATEGORIES[record['ORIG_CRIMETYPE_NAME']] except KeyError: d = ('Unknown', 'Unknown') record['broad_category'], record['detail_category'] = d return record
def clean_list_record(self, record): record['crime_date'] = parse_date(record['crime_date'], '%m-%d-%Y') record['crime_time'] = parse_time(record['crime_time'], '%I:%M:%S %p') record['lon'] = float(record['lon']) record['lat'] = float(record['lat']) return record
def _item_create(info): info = copy.deepcopy(info) try: assert info.pop('type') == 'Feature' props = info['properties'] except (KeyError, AssertionError): raise InvalidNewsItem({'type': 'not a valid GeoJSON Feature'}) try: slug = props.pop('type', None) schema = models.Schema.objects.get(slug=slug) except (models.Schema.DoesNotExist): raise InvalidNewsItem({'type': 'schema %r does not exist' % slug}) data = {'schema': schema.id} for key in ('title', 'description', 'url'): data[key] = props.pop(key, '') # If there are errors parsing the dates, keep the raw data and let # the ModelForm sort it out. pub_date = props.pop('pub_date', None) if pub_date: try: data['pub_date'] = normalize_datetime(pyrfc3339.parse(pub_date)) except Exception: data['pub_date'] = pub_date else: data['pub_date'] = normalize_datetime(datetime.datetime.utcnow()) item_date = props.pop('item_date', None) if item_date: try: data['item_date'] = parse_date(item_date, '%Y-%m-%d', False) except Exception: data['item_date'] = item_date else: try: data['item_date'] = data['pub_date'].date() except Exception: data['item_date'] = None data['location'], data['location_name'] = _get_location_info( info.get('geometry'), props.pop('location_name', None)) if not data['location']: logger.warn("Saving NewsItem %s with no geometry" % data['title']) if not data['location_name']: logger.warn("Saving NewsItem %s with no location_name" % data['title']) from ebpub.db.forms import NewsItemForm form = NewsItemForm(data) if form.is_valid(): item = form.save() else: raise InvalidNewsItem(form.errors) # Everything else goes in .attributes. attributes = {} for key, val in props.items(): sf = models.SchemaField.objects.get(schema=schema, name=key) if sf.is_many_to_many_lookup(): lookups = [] for lookup_name in val: lookups.append( models.Lookup.objects.get_or_create_lookup(sf, lookup_name)) val = ','.join((str(lookup.id) for lookup in lookups)) elif sf.is_lookup: val = models.Lookup.objects.get_or_create_lookup(sf, val) elif sf.is_type('date'): val = normalize_datetime(parse_date(val, '%Y-%m-%d')) elif sf.is_type('time'): val = normalize_datetime(parse_time(val, '%H:%M')) elif sf.is_type('datetime'): val = normalize_datetime(pyrfc3339.parse(datetime)) attributes[key] = val item.attributes = attributes return item
def _item_create(info): info = copy.deepcopy(info) try: assert info.pop('type') == 'Feature' props = info['properties'] except (KeyError, AssertionError): raise InvalidNewsItem({'type': 'not a valid GeoJSON Feature'}) try: slug = props.pop('type', None) schema = models.Schema.objects.get(slug=slug) except (models.Schema.DoesNotExist): raise InvalidNewsItem({'type': 'schema %r does not exist' % slug}) data = {'schema': schema.id} for key in ('title', 'description', 'url'): data[key] = props.pop(key, '') # If there are errors parsing the dates, keep the raw data and let # the ModelForm sort it out. pub_date = props.pop('pub_date', None) if pub_date: try: data['pub_date'] = normalize_datetime(pyrfc3339.parse(pub_date)) except Exception: data['pub_date'] = pub_date else: data['pub_date'] = normalize_datetime(datetime.datetime.utcnow()) item_date = props.pop('item_date', None) if item_date: try: data['item_date'] = parse_date(item_date, '%Y-%m-%d', False) except Exception: data['item_date'] = item_date else: try: data['item_date'] = data['pub_date'].date() except Exception: data['item_date'] = None data['location'], data['location_name'] = _get_location_info( info.get('geometry'), props.pop('location_name', None)) if not data['location']: logger.warn("Saving NewsItem %s with no geometry" % data['title']) if not data['location_name']: logger.warn("Saving NewsItem %s with no location_name" % data['title']) from ebpub.db.forms import NewsItemForm form = NewsItemForm(data) if form.is_valid(): item = form.save() else: raise InvalidNewsItem(form.errors) # Everything else goes in .attributes. attributes = {} for key, val in props.items(): sf = models.SchemaField.objects.get(schema=schema, name=key) if sf.is_many_to_many_lookup(): lookups = [] for lookup_name in val: lookups.append( models.Lookup.objects.get_or_create_lookup( sf, lookup_name)) val = ','.join((str(lookup.id) for lookup in lookups)) elif sf.is_lookup: val = models.Lookup.objects.get_or_create_lookup(sf, val) elif sf.is_type('date'): val = normalize_datetime(parse_date(val, '%Y-%m-%d')) elif sf.is_type('time'): val = normalize_datetime(parse_time(val, '%H:%M')) elif sf.is_type('datetime'): val = normalize_datetime(pyrfc3339.parse(datetime)) attributes[key] = val item.attributes = attributes return item
def clean_list_record(self, record): record["crime_date"] = parse_date(record["crime_date"], "%m-%d-%Y") record["crime_time"] = parse_time(record["crime_time"], "%I:%M:%S %p") record["lon"] = float(record["lon"]) record["lat"] = float(record["lat"]) return record