Example #1
0
    def save(self, old_record, list_record, detail_record):
        if old_record is not None:
            return

        property_type = self.get_or_create_lookup('property_type',
                                                  list_record['property_type'],
                                                  list_record['property_type'])
        year_of_mortgage = self.get_or_create_lookup(
            'year_of_mortgage', list_record['year_of_mortgage'],
            list_record['year_of_mortgage'])
        newsitem_title = 'Foreclosure filed for property in the %s' % address_to_block(
            strip_unit(list_record['address']))
        attributes = {
            'original_principal': list_record['original_i'],
            'property_type': property_type.id,
            'year_of_mortgage': year_of_mortgage.id,
            'pin_number': list_record['pin_number'],
            'filing_date': list_record['filing_date'],
            'case_number': list_record['case_number'],
            'document_number': list_record['document_number'],
            'raw_address': list_record['address'],
        }
        self.create_newsitem(
            attributes,
            convert_to_block=True,
            title=newsitem_title,
            item_date=list_record['filing_date'],
            location_name=strip_unit(list_record['address']),
        )
Example #2
0
    def save(self, old_record, list_record, detail_record):
        if old_record is not None:
            return

        property_type = self.get_or_create_lookup('property_type', list_record['property_type'], list_record['property_type'])
        year_of_mortgage = self.get_or_create_lookup('year_of_mortgage', list_record['year_of_mortgage'], list_record['year_of_mortgage'])
        purchaser = self.get_or_create_lookup('purchaser', list_record['purchaser'], list_record['purchaser'])
        newsitem_title = 'Property on the %s sold at foreclosure auction' % address_to_block(strip_unit(list_record['address']))
        attributes = {
            'original_principal': list_record['original_i'],
            'property_type': property_type.id,
            'year_of_mortgage': year_of_mortgage.id,
            'pin_number': list_record['pin_number'],
            'case_number': list_record['case_number'],
            'document_number': list_record['document_number'],
            'filing_date': list_record['filing_date'],
            'raw_address': list_record['address'],
            'auction_price': list_record['auction_price'],
            'purchaser': purchaser.id,
        }
        self.create_newsitem(
            attributes,
            convert_to_block=True,
            title=newsitem_title,
            item_date=list_record['sale_date'],
            location_name=strip_unit(list_record['address']),
        )
    def create_newsitem(self, attributes, **kwargs):
        """
        Creates and saves a NewsItem with the given kwargs. Returns the new
        NewsItem.

        kwargs MUST have the following keys:
            title
            item_date
            location_name
        For any other kwargs whose values aren't provided, this will use
        sensible defaults.

        kwargs may optionally contain a 'convert_to_block' boolean. If True,
        this will convert the given kwargs['location_name'] to a block level
        but will use the real (non-block-level) address for geocoding and Block
        association.

        attributes is a dictionary to use to populate this NewsItem's Attribute
        object.
        """
        block = location = None
        if 'location' not in kwargs:
            location = self.geocode(kwargs['location_name'])
            if location:
                block = location['block']
                location = location['point']
        if kwargs.pop('convert_to_block', False):
            kwargs['location_name'] = address_to_block(kwargs['location_name'])
            # If the exact address couldn't be geocoded, try using the
            # normalized location name.
            if location is None:
                location = self.geocode(kwargs['location_name'])
                if location:
                    block = location['block']
                    location = location['point']

        # Normally we'd just use "schema = kwargs.get('schema', self.schema)",
        # but self.schema will be evaluated even if the key is found in
        # kwargs, which raises an error when using multiple schemas.
        schema = kwargs.get('schema', None)
        schema = schema or self.schema

        ni = NewsItem.objects.create(
            schema=schema,
            title=kwargs['title'],
            description=kwargs.get('description', ''),
            url=kwargs.get('url', ''),
            pub_date=kwargs.get('pub_date', self.start_time),
            item_date=kwargs['item_date'],
            location=kwargs.get('location', location),
            location_name=kwargs['location_name'],
            location_object=kwargs.get('location_object', None),
            block=kwargs.get('block', block),
        )
        if attributes is not None:
            ni.attributes = attributes
        self.num_added += 1
        self.logger.info(u'Created NewsItem %s (total created in this scrape: %s)', ni.id, self.num_added)
        return ni
Example #4
0
    def create_newsitem(self, attributes, **kwargs):
        """
        Creates and saves a NewsItem with the given kwargs. Returns the new
        NewsItem.

        kwargs MUST have the following keys:
            title
            item_date
            location_name
        For any other kwargs whose values aren't provided, this will use
        sensible defaults.

        kwargs may optionally contain a 'convert_to_block' boolean. If True,
        this will convert the given kwargs['location_name'] to a block level
        but will use the real (non-block-level) address for geocoding and Block
        association.

        attributes is a dictionary to use to populate this NewsItem's Attribute
        object.
        """
        block = location = None
        if 'location' not in kwargs:
            location = self.geocode(kwargs['location_name'])
            if location:
                block = location['block']
                location = location['point']
        if kwargs.pop('convert_to_block', False):
            kwargs['location_name'] = address_to_block(kwargs['location_name'])
            # If the exact address couldn't be geocoded, try using the
            # normalized location name.
            if location is None:
                location = self.geocode(kwargs['location_name'])
                if location:
                    block = location['block']
                    location = location['point']

        # Normally we'd just use "schema = kwargs.get('schema', self.schema)",
        # but self.schema will be evaluated even if the key is found in
        # kwargs, which raises an error when using multiple schemas.
        schema = kwargs.get('schema', None)
        schema = schema or self.schema

        ni = NewsItem.objects.create(
            schema=schema,
            title=kwargs['title'],
            description=kwargs.get('description', ''),
            url=kwargs.get('url', ''),
            pub_date=kwargs.get('pub_date', self.start_time),
            item_date=kwargs['item_date'],
            location=kwargs.get('location', location),
            location_name=kwargs['location_name'],
            location_object=kwargs.get('location_object', None),
            block=kwargs.get('block', block),
        )
        ni.attributes = attributes
        self.num_added += 1
        self.logger.info(u'Created NewsItem %s (total created in this scrape: %s)', ni.id, self.num_added)
        return ni
Example #5
0
    def normalize_address(self, record):
        """
        Addresses are provided with no spaces, so try to find the suffix if
        there is one, then compare the reamining part to the streets table.
        """
        if record['offensestreet'] is None:
            raise SkipRecord('Skipping record with no street')

        street = record['offensestreet']
        matching_suffix = ''

        suffix_groups = [
            ('EXPWY', 'PKWY', 'FRWY', 'BLVD'),
            ('HWY', 'FRW', 'AVE', 'CIR', 'EXT', 'BLV', 'PKW', 'ROW', 'WAY', 'EXP'),
            ('DR', 'ST', 'RD', 'LN', 'BL', 'TR', 'WY', 'CT', 'PL', 'AV', 'CI'),
            ('P', 'R', 'F', 'D', 'S', 'L')
        ]

        match_found = False
        for group in suffix_groups:
            if match_found:
                break
            for suffix in group:
                if record['offensestreet'].endswith(suffix):
                    street_name = record['offensestreet'][:-len(suffix)]
                    # Try looking up the street name from a dictionary mapping
                    # collapsed street names to names in the streets table.
                    try:
                        street = self.streets[street_name]
                        matching_suffix = suffix
                        match_found = True
                        break
                    except KeyError:
                        # SAINT is encoded as ST in the data, but Saint in the streets table,
                        # so try that if the address starts with ST.
                        if street_name.startswith('ST'):
                            street_name = 'SAINT%s' % street_name[2:]
                            try:
                                street = self.streets[street_name]
                                matching_suffix = suffix
                                match_found = True
                                break
                            except KeyError:
                                continue

        if match_found:
            self.matches_found += 1
        self.records_seen += 1

        normalized_block = record['offenseblock'].lstrip('0')
        if normalized_block[-2:] == 'xx':
            normalized_block = normalized_block.replace('xx', '00')
        address = '%s %s %s %s' % (
            normalized_block, record['offensedirection'] or '', street, matching_suffix
        )
        address = re.sub(r'\s+', ' ', address)
        return address_to_block(address)
Example #6
0
    def normalize_address(self, record):
        """
        Addresses are provided with no spaces, so try to find the suffix if
        there is one, then compare the reamining part to the streets table.
        """
        if record['offensestreet'] is None:
            raise SkipRecord('Skipping record with no street')

        street = record['offensestreet']
        matching_suffix = ''

        suffix_groups = [('EXPWY', 'PKWY', 'FRWY', 'BLVD'),
                         ('HWY', 'FRW', 'AVE', 'CIR', 'EXT', 'BLV', 'PKW',
                          'ROW', 'WAY', 'EXP'),
                         ('DR', 'ST', 'RD', 'LN', 'BL', 'TR', 'WY', 'CT', 'PL',
                          'AV', 'CI'), ('P', 'R', 'F', 'D', 'S', 'L')]

        match_found = False
        for group in suffix_groups:
            if match_found:
                break
            for suffix in group:
                if record['offensestreet'].endswith(suffix):
                    street_name = record['offensestreet'][:-len(suffix)]
                    # Try looking up the street name from a dictionary mapping
                    # collapsed street names to names in the streets table.
                    try:
                        street = self.streets[street_name]
                        matching_suffix = suffix
                        match_found = True
                        break
                    except KeyError:
                        # SAINT is encoded as ST in the data, but Saint in the streets table,
                        # so try that if the address starts with ST.
                        if street_name.startswith('ST'):
                            street_name = 'SAINT%s' % street_name[2:]
                            try:
                                street = self.streets[street_name]
                                matching_suffix = suffix
                                match_found = True
                                break
                            except KeyError:
                                continue

        if match_found:
            self.matches_found += 1
        self.records_seen += 1

        normalized_block = record['offenseblock'].lstrip('0')
        if normalized_block[-2:] == 'xx':
            normalized_block = normalized_block.replace('xx', '00')
        address = '%s %s %s %s' % (normalized_block, record['offensedirection']
                                   or '', street, matching_suffix)
        address = re.sub(r'\s+', ' ', address)
        return address_to_block(address)
Example #7
0
    def normalize_address(self, record):
        """
        Addresses are provided with no spaces, so try to find the suffix if
        there is one, then compare the reamining part to the streets table.
        """
        if record["offensestreet"] is None:
            raise SkipRecord("Skipping record with no street")

        street = record["offensestreet"]
        matching_suffix = ""

        suffix_groups = [
            ("EXPWY", "PKWY", "FRWY", "BLVD"),
            ("HWY", "FRW", "AVE", "CIR", "EXT", "BLV", "PKW", "ROW", "WAY", "EXP"),
            ("DR", "ST", "RD", "LN", "BL", "TR", "WY", "CT", "PL", "AV", "CI"),
            ("P", "R", "F", "D", "S", "L"),
        ]

        match_found = False
        for group in suffix_groups:
            if match_found:
                break
            for suffix in group:
                if record["offensestreet"].endswith(suffix):
                    street_name = record["offensestreet"][: -len(suffix)]
                    # Try looking up the street name from a dictionary mapping
                    # collapsed street names to names in the streets table.
                    try:
                        street = self.streets[street_name]
                        matching_suffix = suffix
                        match_found = True
                        break
                    except KeyError:
                        # SAINT is encoded as ST in the data, but Saint in the streets table,
                        # so try that if the address starts with ST.
                        if street_name.startswith("ST"):
                            street_name = "SAINT%s" % street_name[2:]
                            try:
                                street = self.streets[street_name]
                                matching_suffix = suffix
                                match_found = True
                                break
                            except KeyError:
                                continue

        if match_found:
            self.matches_found += 1
        self.records_seen += 1

        normalized_block = record["offenseblock"].lstrip("0")
        if normalized_block[-2:] == "xx":
            normalized_block = normalized_block.replace("xx", "00")
        address = "%s %s %s %s" % (normalized_block, record["offensedirection"] or "", street, matching_suffix)
        address = re.sub(r"\s+", " ", address)
        return address_to_block(address)
Example #8
0
    def clean_list_record(self, record):
        # Save the raw address so we can use it to find duplicate records in
        # the future.
        address = smart_title(record['address'].strip().replace('&', '&').replace(' ', ' ')).strip()
        record['raw_address'] = address
        record['address'] = address_to_block(clean_address(address))

        record['disposition'] = record['disposition'].replace('&', '&').replace(' ', ' ').strip() or 'Not available'
        record['event'] = record['event'].replace('&', '&').replace(' ', ' ').strip()
        item_date = parse_date(record['datetime'], '%m/%d/%Y %I:%M:%S %p', return_datetime=True)
        record['item_date'] = item_date.date()
        record['item_time'] = item_date.time()

        # Normalize this value.
        if record['disposition'] == 'CANCCOMM':
            record['disposition'] = 'CANCELLED BY COMMUNICATIONS'

        return record
Example #9
0
            return {'type': 'address', 'result': results, 'ambiguous': True}
        else:
            return {'type': 'address', 'result': results[0], 'ambiguous': False}

    except InvalidBlockButValidStreet, e:
        result = {
            'type': 'block',
            'ambiguous': True,
            'result': e.block_list,
            'street_name': e.street_name,
            'block_number': e.block_number,
            }
        if convert_to_block:
            # If the exact address couldn't be geocoded, try using the
            # normalized block name.
            block_name = address_to_block(query)
            if block_name != query:
                try:
                    result['result'] = BlockGeocoder()._do_geocode(block_name)
                    result['result']['address'] = block_name
                    result['ambiguous'] = False
                    logger.debug('Resolved %r to block %r' % (query, block_name))
                except (InvalidBlockButValidStreet, AmbiguousResult):
                    pass
        if result['ambiguous']:
            logger.debug('Invalid block for %r, returning all possible blocks' % query)
        return result

    except:
        raise
Example #10
0
    def create_newsitem(self, attributes, **kwargs):
        """
        Creates and saves a NewsItem with the given kwargs. Returns the new
        NewsItem.

        kwargs MUST have the following keys:
        *   title
        *   item_date
        *   location_name AND/OR location

        For any other kwargs whose values aren't provided, this will use
        sensible defaults.

        ``attributes`` is a dictionary to use to populate this
        NewsItem's Attribute objects.

        kwargs MAY have the following keys:

            zipcode, city, and/or state

              used to disambiguate geocoded locations.

            convert_to_block

              convert the given kwargs['location_name']
              to a block level but will try to use the real
              (non-block-level) address for geocoding.
              Default False.

        """

        convert_to_block = kwargs.pop('convert_to_block', False)
        location, location_name = self.geocode_if_needed(
            kwargs.get('location', None),
            kwargs.get('location_name', None),
            zipcode=kwargs.pop('zipcode', None),
            city=kwargs.pop('city', None),
            state=kwargs.pop('state', None),
            convert_to_block=convert_to_block,
        )

        assert location or location_name, "At least one of location or location_name must be provided"

        if convert_to_block:
            location_name = address_to_block(location_name)

        kwargs['location_name'] = location_name
        kwargs['location'] = location

        # Normally we'd just use "schema = kwargs.get('schema', self.schema)",
        # but self.schema will be evaluated even if the key is found in
        # kwargs, which raises an error when using multiple schemas.
        schema = kwargs.get('schema', None) or self.schema

        ni = NewsItem.objects.create(
            schema=schema,
            title=kwargs['title'],
            description=kwargs.get('description', ''),
            url=kwargs.get('url', ''),
            pub_date=kwargs.get('pub_date', self.start_time),
            item_date=kwargs['item_date'],
            location=location,
            location_name=location_name,
            location_object=kwargs.get('location_object', None),
        )
        if attributes is not None:
            ni.attributes = attributes
        self.num_added += 1
        self.logger.info(
            u'Created NewsItem %s: %s (total created in this scrape: %s)',
            schema.slug, ni.id, self.num_added)
        return ni
Example #11
0
                'result': results[0],
                'ambiguous': False
            }

    except InvalidBlockButValidStreet, e:
        result = {
            'type': 'block',
            'ambiguous': True,
            'result': e.block_list,
            'street_name': e.street_name,
            'block_number': e.block_number,
        }
        if convert_to_block:
            # If the exact address couldn't be geocoded, try using the
            # normalized block name.
            block_name = address_to_block(query)
            if block_name != query:
                try:
                    result['result'] = BlockGeocoder()._do_geocode(block_name)
                    result['result']['address'] = block_name
                    result['ambiguous'] = False
                    logger.debug('Resolved %r to block %r' %
                                 (query, block_name))
                except (InvalidBlockButValidStreet, AmbiguousResult):
                    pass
        if result['ambiguous']:
            logger.debug(
                'Invalid block for %r, returning all possible blocks' % query)
        return result

    except:
Example #12
0
    def create_newsitem(self, attributes, **kwargs):
        """
        Creates and saves a NewsItem with the given kwargs. Returns the new
        NewsItem.

        kwargs MUST have the following keys:
        *   title
        *   item_date
        *   location_name AND/OR location

        For any other kwargs whose values aren't provided, this will use
        sensible defaults.

        ``attributes`` is a dictionary to use to populate this
        NewsItem's Attribute objects.

        kwargs MAY have the following keys:

            zipcode, city, and/or state

              used to disambiguate geocoded locations.

            convert_to_block

              convert the given kwargs['location_name']
              to a block level but will try to use the real
              (non-block-level) address for geocoding.
              Default False.

        """

        convert_to_block = kwargs.pop('convert_to_block', False)
        location, location_name = self.geocode_if_needed(
            kwargs.get('location', None),
            kwargs.get('location_name', None),
            zipcode=kwargs.pop('zipcode', None),
            city=kwargs.pop('city', None),
            state=kwargs.pop('state', None),
            convert_to_block=convert_to_block,
            )

        assert location or location_name, "At least one of location or location_name must be provided"

        if convert_to_block:
            location_name = address_to_block(location_name)

        kwargs['location_name'] = location_name
        kwargs['location'] = location

        # Normally we'd just use "schema = kwargs.get('schema', self.schema)",
        # but self.schema will be evaluated even if the key is found in
        # kwargs, which raises an error when using multiple schemas.
        schema = kwargs.get('schema', None) or self.schema

        ni = NewsItem.objects.create(
            schema=schema,
            title=kwargs['title'],
            description=kwargs.get('description', ''),
            url=kwargs.get('url', ''),
            pub_date=kwargs.get('pub_date', self.start_time),
            item_date=kwargs['item_date'],
            location=location,
            location_name=location_name,
            location_object=kwargs.get('location_object', None),
        )
        if attributes is not None:
            ni.attributes = attributes
        self.num_added += 1
        self.logger.info(u'Created NewsItem %s: %s (total created in this scrape: %s)', schema.slug, ni.id, self.num_added)
        return ni